@BOOK{urn,
 AUTHOR = {Norman L. Johnson and Samuel Kotz},
 TITLE = "{Urn} {Models} and {Their} {Application} :{An} {Approach} to {Modern} {Discrete} {Probability} {Theory}",
 PUBLISHER = "John Wiley and Sons",
 ADDRESS = "New York",
 YEAR = "1977"	
 }

@article{DataStreams2005,
 author = {Muthukrishnan, S.},
 title = {{Data} {Streams}: {Algorithms} and {Applications}},
 journal = {Found. Trends Theor. Comput. Sci.},
 issue_date = {August 2005},
 volume = {1},
 number = {2},
 month = aug,
 year = {2005},
 issn = {1551-305X},
 pages = {117--236},
 numpages = {120},
 doi = {10.1561/0400000002},
 acmid = {1166410},
 publisher = {Now Publishers Inc.},
 address = {Hanover, MA, USA},
} 

@article{SamplingImpact,
 author = {Carela-Espa\~{n}ol, Valent\'{\i}n and Barlet-Ros, Pere and Cabellos-Aparicio, Albert and Sol{\'e}-Pareta, Josep},
 title = {{Analysis} of the {Impact} of {Sampling} on {NetFlow} {Traffic} {Classification}},
 journal = {Comput. Netw.},
 issue_date = {April, 2011},
 volume = {55},
 number = {5},
 month = apr,
 year = {2011},
 issn = {1389-1286},
 pages = {1083--1099},
 numpages = {17},
 doi = {10.1016/j.comnet.2010.11.002},
 acmid = {1953895},
 publisher = {Elsevier North-Holland, Inc.},
 address = {New York, NY, USA},
 keywords = {Machine learning, Network management, Traffic classification},
} 

@article{duffield2004,
author = "Duffield, Nick",
doi = "10.1214/088342304000000206",
fjournal = "Statistical Science",
journal = "Statist. Sci.",
month = "08",
number = "3",
pages = "472--498",
publisher = "The Institute of Mathematical Statistics",
title = "{Sampling} for {Passive} {Internet} {Measurement}: {A} {Review}",
volume = "19",
year = "2004"
}

@article{sklar_netlogo_2014,
	title = {{NetLogo}, a multi-agent simulation environment.},
	volume = {13},
	abstract = {PubMed comprises more than 23 million citations for biomedical literature from MEDLINE, life science journals, and online books. Citations may include link...},
	number = {3},
	urldate = {2018-04-14},
	journal = {Artificial Life},
	author = {Sklar, E},
	year = {2014},
	pages = {303--311},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\T8VWASKL\\s.html:text/html}
}

@inproceedings{tisue_netlogo:_2004,
	title = {{NetLogo}: {A} simple environment for modeling complexity},
	shorttitle = {{NetLogo}},
	abstract = {NetLogo (Wilensky, 1999) is a multi-agent programming language and development environment for modeling complex systems. It is designed for both education ...},
	urldate = {2018-04-14},
	booktitle = {International {Conference} on {Complex} {Systems}},
	author = {Tisue, Seth and Wilensky, Uri},
	year = {2004},
	pages = {16--21},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\6ZQY4QBL\\s.html:text/html}
}

@inproceedings{tisue_netlogo:_2004-1,
	title = {{NetLogo}: {Design} and implementation of a multi-agent modeling environment},
	shorttitle = {{NetLogo}},
	abstract = {CiteSeerX - Document Details (Isaac Councill, Lee Giles): null},
	urldate = {2018-04-14},
	booktitle = {Agent},
	author = {Tisue, S.},
	year = {2004},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\HQF5EU78\\s.html:text/html}
}

@article{thiele_netlogo_2010,
	title = {{NetLogo} meets {R}: {Linking} agent-based models with a toolbox for their analysis},
	volume = {25},
	shorttitle = {{NetLogo} meets {R}},
	doi = {10.1016/j.envsoft.2010.02.008},
	abstract = {NetLogo is a software platform for agent-based modelling that is increasingly used in ecological and environmental modelling. So far, for comprehensive ana...},
	number = {8},
	urldate = {2018-04-14},
	journal = {Environmental Modelling \& Software},
	author = {Thiele, Jan C and Grimm, Volker},
	year = {2010},
	pages = {972--974},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\RACNQX7J\\s.html:text/html}
}

@inproceedings{wilensky_netlogo_1999,
	title = {{NetLogo} [{Computer} {Program}]: {Center} for {Connected} {Learning} and {Computer}-{Based} {Modeling}},
	shorttitle = {{NetLogo} [{Computer} {Program}]},
	abstract = {NetLogo [Computer Program]: Center for Connected Learning and Computer-Based ModelingU. Wilensky...},
	urldate = {2018-04-14},
	author = {Wilensky, U.},
	year = {1999},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\S9ZYNHZ4\\s.html:text/html}
}

@article{kahn_introduction_2015,
	title = {An {Introduction} to {Agent}-{Based} {Modeling}: {Modeling} {Natural}, {Social}, and {Engineered} {Complex} {Systems} with {NetLogo}},
	volume = {68},
	shorttitle = {An {Introduction} to {Agent}-{Based} {Modeling}},
	abstract = {Scitation is the online home of leading journals and conference proceedings from AIP Publishing and AIP Member Societies},
	number = {8},
	urldate = {2018-04-14},
	journal = {Physics Today},
	author = {Kahn, Ken},
	year = {2015},
	pages = {55--55},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\V8JHGTKL\\s.html:text/html}
}

@article{thiele_facilitating_2014,
	title = {Facilitating {Parameter} {Estimation} and {Sensitivity} {Analysis} of {Agent}-{Based} {Models}: {A} {Cookbook} {Using} {NetLogo} and '{R}'},
	volume = {17},
	shorttitle = {Facilitating {Parameter} {Estimation} and {Sensitivity} {Analysis} of {Agent}-{Based} {Models}},
	abstract = {Agent-based models are increasingly used to address questions regarding real-world phenomena and mechanisms; therefore, the calibration of model parameters...},
	number = {3},
	urldate = {2018-04-14},
	journal = {Journal of Artificial Societies \& Social Simulation},
	author = {Thiele, Jan C and Kurth, Winfried and Grimm, Volker},
	year = {2014},
	pages = {11},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\4BJZEYUC\\s.html:text/html}
}

@inproceedings{palkar_e2:_2015,
	title = {E2: a framework for {NFV} applications},
	shorttitle = {E2},
	abstract = {By moving network appliance functionality from proprietary hardware to software, Network Function Virtualization promises to bring the advantages of cloud computing to network packet processing. However, the evolution of cloud computing ...},
	urldate = {2018-04-14},
	booktitle = {Symposium on {Operating} {Systems} {Principles}},
	author = {Palkar, Shoumik and Lan, Chang and Han, Sangjin and Jang, Keon and Panda, Aurojit and Ratnasamy, Sylvia and Rizzo, Luigi and Shenker, Scott},
	year = {2015},
	pages = {121--136},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\9C5XXZ8P\\s.html:text/html}
}

@book{willingham_why_2009,
	address = {San Francisco, CA},
	edition = {1st ed},
	title = {Why don't students like school? a cognitive scientist answers questions about how the mind works and what it means for the classroom},
	isbn = {978-0-470-27930-4 978-0-470-59196-3},
	shorttitle = {Why don't students like school?},
	abstract = {Cognitive scientist Dan Willingham has focused his acclaimed research on the biological and cognitive basis of learning and has a deep understanding of the daily challenges faced by classroom teachers. This book will help teachers improve their practice by explaining how they and their students think and learn revealing the importance of story, emotion, memory, context, and routine in building knowledge and creating lasting learning experiences. --from publisher description},
	publisher = {Jossey-Bass},
	author = {Willingham, Daniel T.},
	year = {2009},
	note = {OCLC: ocn255894389},
	keywords = {Effective teaching, Learning, Psychology of}
}

@techreport{phaal_inmon_2001,
	title = {{InMon} {Corporation}'s {sFlow}: {A} {Method} for {Monitoring} {Traffic} in {Switched} and {Routed} {Networks}},
	shorttitle = {{InMon} {Corporation}'s {sFlow}},
	abstract = {This memo defines InMon Coporation’s sFlow system. sFlow is a technology for monitoring traffic in data networks containing switches and routers. In particular, it defines the sampling mechanisms implemented in an sFlow Agent for monitoring traffic, the sFlow MIB for controlling the sFlow Agent, and the format of sample data used by the sFlow Agent when forwarding data to a central data collector.},
	language = {en},
	number = {RFC3176},
	urldate = {2018-04-14},
	institution = {RFC Editor},
	author = {Phaal, P. and Panchen, S. and McKee, N.},
	month = sep,
	year = {2001},
	doi = {10.17487/rfc3176},
	file = {Phaal 等。 - 2001 - InMon Corporation's sFlow A Method for Monitoring.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ANW638ZS\\Phaal 等。 - 2001 - InMon Corporation's sFlow A Method for Monitoring.pdf:application/pdf}
}

@inproceedings{yuan_quantitative_2017,
	title = {Quantitative {Network} {Monitoring} with {NetQRE}},
	isbn = {978-1-4503-4653-5},
	doi = {10.1145/3098822.3098830},
	abstract = {In network management today, dynamic updates are required for traffic engineering and for timely response to security threats. Decisions for such updates are based on monitoring network traffic to compute numerical quantities based on a variety of network and application-level performance metrics. Today’s state-of-the-art tools lack programming abstractions that capture application or session-layer semantics, and thus require network operators to specify and reason about complex state machines and interactions across layers. To address this limitation, we present the design and implementation of NetQRE, a high-level declarative toolkit that aims to simplify the specification and implementation of such quantitative network policies. NetQRE integrates regular-expression-like pattern matching at flow-level as well as application-level payloads with aggregation operations such as sum and average counts. We describe a compiler for NetQRE that automatically generates an efficient implementation with low memory footprint. Our evaluation results demonstrate that NetQRE allows natural specification of a wide range of quantitative network tasks ranging from detecting security attacks to enforcing application-layer network management policies. NetQRE results in high performance that is comparable with optimized manually-written low-level code and is significantly more efficient than alternative solutions, and can provide timely enforcement of network policies that require quantitative network monitoring.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Yuan, Yifei and Lin, Dong and Mishra, Ankit and Marwaha, Sajal and Alur, Rajeev and Loo, Boon Thau},
	year = {2017},
	keywords = {NetQRE, network monitoring language, quantitative regular expression},
	pages = {99--112},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\YPC65PYE\\Yuan et al. - 2017 - Quantitative Network Monitoring with NetQRE.pdf:application/pdf;Yuan 等。 - 2017 - Quantitative Network Monitoring with NetQRE.pdf:C\:\\Users\\zzy\\Zotero\\storage\\AZKEPJBT\\Yuan 等。 - 2017 - Quantitative Network Monitoring with NetQRE.pdf:application/pdf}
}

@inproceedings{moshref_trumpet:_2016,
	title = {Trumpet: {Timely} and {Precise} {Triggers} in {Data} {Centers}},
	isbn = {978-1-4503-4193-6},
	shorttitle = {Trumpet},
	doi = {10.1145/2934872.2934879},
	abstract = {As data centers grow larger and strive to provide tight performance and availability SLAs, their monitoring infrastructure must move from passive systems that provide aggregated inputs to human operators, to active systems that enable programmed control. In this paper, we propose Trumpet, an event monitoring system that leverages CPU resources and end-host programmability, to monitor every packet and report events at millisecond timescales. Trumpet users can express many network-wide events, and the system efﬁciently detects these events using triggers at end-hosts. Using careful design, Trumpet can evaluate triggers by inspecting every packet at full line rate even on future generations of NICs, scale to thousands of triggers per end-host while bounding packet processing delay to a few microseconds, and report events to a controller within 10 milliseconds, even in the presence of attacks. We demonstrate these properties using an implementation of Trumpet, and also show that it allows operators to describe new network events such as detecting correlated bursts and loss, identifying the root cause of transient congestion, and detecting short-term anomalies at the scale of a data center tenant.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Moshref, Masoud and Yu, Minlan and Govindan, Ramesh and Vahdat, Amin},
	year = {2016},
	keywords = {End-host Monitoring, Network Event Monitoring},
	pages = {129--143},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\FTA2TF76\\Moshref et al. - 2016 - Trumpet Timely and Precise Triggers in Data Cente.pdf:application/pdf;Moshref 等。 - 2016 - Trumpet Timely and Precise Triggers in Data Cente.pdf:C\:\\Users\\zzy\\Zotero\\storage\\6GKGYEYN\\Moshref 等。 - 2016 - Trumpet Timely and Precise Triggers in Data Cente.pdf:application/pdf}
}

@inproceedings{liu_one_2016,
	title = {One {Sketch} to {Rule} {Them} {All}: {Rethinking} {Network} {Flow} {Monitoring} with {UnivMon}},
	isbn = {978-1-4503-4193-6},
	shorttitle = {One {Sketch} to {Rule} {Them} {All}},
	doi = {10.1145/2934872.2934906},
	abstract = {Network management requires accurate estimates of metrics for many applications including trafﬁc engineering (e.g., heavy hitters), anomaly detection (e.g., entropy of source addresses), and security (e.g., DDoS detection). Obtaining accurate estimates given router CPU and memory constraints is a challenging problem. Existing approaches fall in one of two undesirable extremes: (1) low ﬁdelity generalpurpose approaches such as sampling, or (2) high ﬁdelity but complex algorithms customized to speciﬁc applicationlevel metrics. Ideally, a solution should be both general (i.e., supports many applications) and provide accuracy comparable to custom algorithms. This paper presents UnivMon, a framework for ﬂow monitoring which leverages recent theoretical advances and demonstrates that it is possible to achieve both generality and high accuracy. UnivMon uses an application-agnostic data plane monitoring primitive; different (and possibly unforeseen) estimation algorithms run in the control plane, and use the statistics from the data plane to compute application-level metrics. We present a proofof-concept implementation of UnivMon using P4 and develop simple coordination techniques to provide a “one-bigswitch” abstraction for network-wide monitoring. We evaluate the effectiveness of UnivMon using a range of tracedriven evaluations and show that it offers comparable (and sometimes better) accuracy relative to custom sketching solutions across a range of monitoring tasks.},
	language = {en},
	urldate = {2018-04-14},
	booktitle = {Proceedings of the 2016 {ACM} {SIGCOMM} {Conference}},
	author = {Liu, Zaoxing and Manousis, Antonis and Vorsanger, Gregory and Sekar, Vyas and Braverman, Vladimir},
	year = {2016},
	keywords = {network-wide traffic measurement, Flow Monitoring, sketch and streaming algorithm},
	pages = {101--114},
	file = {Liu 等。 - 2016 - One Sketch to Rule Them All Rethinking Network Fl.pdf:C\:\\Users\\zzy\\Zotero\\storage\\YXBUFUVR\\Liu 等。 - 2016 - One Sketch to Rule Them All Rethinking Network Fl.pdf:application/pdf}
}

@inproceedings{liu_mozart:_2016,
	title = {{MOZART}: {Temporal} {Coordination} of {Measurement}},
	isbn = {978-1-4503-4211-7},
	shorttitle = {{MOZART}},
	doi = {10.1145/2890955.2890964},
	abstract = {In data center and ISP networks, many monitoring tasks are not at a single network device and require coordination across many devices. Because network devices have different views of trafﬁc and different capabilities of monitoring trafﬁc properties, it is useful for one device to tell another one which ﬂows to monitor at which time, rather than monitoring all the ﬂows all the time. In this paper, we present MOZART (MOnitor ﬂowZ At the Right Time), which enables temporal coordination across network devices. MOZART includes two key components: the selectors which capture network events and select related ﬂows, and the monitors which collect ﬂow-level statistics of the selected ﬂows. We design temporal coordination algorithms and mechanisms across selectors and monitors to maximize the monitoring accuracy while staying within memory constraints. We also optimize the placement of selectors and monitors to support the maximum number of monitoring tasks. We implement MOZART in an Open vSwitch-based testbed and run extensive experiments with real trafﬁc traces. Our results show a reduction of the false negative ratio from 15\% to 1\% compared to the existing method without coordination.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Liu, Xuemei and Shirazipour, Meral and Yu, Minlan and Zhang, Ying},
	year = {2016},
	keywords = {network-wide traffic measurement},
	pages = {1--12},
	file = {Liu 等。 - 2016 - MOZART Temporal Coordination of Measurement.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SFSYLALD\\Liu 等。 - 2016 - MOZART Temporal Coordination of Measurement.pdf:application/pdf}
}

@inproceedings{moshref_scream:_2015,
	series = {{CoNEXT}'15},
	title = {{SCREAM}: {Sketch} {Resource} {Allocation} for {Software}-{Defined} {Measurement}},
	isbn = {978-1-4503-3412-9},
	shorttitle = {{SCREAM}},
	doi = {10.1145/2716281.2836099},
	abstract = {Software-deﬁned networks can enable a variety of concurrent, dynamically instantiated, measurement tasks, that provide ﬁne-grain visibility into network trafﬁc. Recently, there have been many proposals for using sketches for network measurement. However, sketches in hardware switches use constrained resources such as SRAM memory, and the accuracy of measurement tasks is a function of the resources devoted to them on each switch. This paper presents SCREAM, a system for allocating resources to sketch-based measurement tasks that ensures a user-speciﬁed minimum accuracy. SCREAM estimates the instantaneous accuracy of tasks so as to dynamically adapt the allocated resources for each task. Thus, by ﬁnding the right amount of resources for each task on each switch and correctly merging sketches at the controller, SCREAM can multiplex resources among networkwide measurement tasks. Simulations with three measurement tasks (heavy hitter, hierarchical heavy hitter, and super source/destination detection) show that SCREAM can support more measurement tasks with higher accuracy than existing approaches.},
	language = {en},
	urldate = {2018-04-14},
	booktitle = {Proceedings of the 11th {ACM} {Conference} on {Emerging} {Networking} {Experiments} and {Technologies}},
	author = {Moshref, Masoud and Yu, Minlan and Govindan, Ramesh and Vahdat, Amin},
	year = {2015},
	keywords = {network-wide traffic measurement, resource allocation, software defined measurement, sketch and streaming algorithm},
	pages = {1--13},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WXD3JSP2\\Moshref et al. - 2015 - SCREAM Sketch Resource Allocation for Software-de.pdf:application/pdf;Moshref 等。 - 2015 - SCREAM sketch resource allocation for software-de.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SUR6V4B4\\Moshref 等。 - 2015 - SCREAM sketch resource allocation for software-de.pdf:application/pdf}
}

@inproceedings{zhu_packet-level_2015,
	title = {Packet-{Level} {Telemetry} in {Large} {Datacenter} {Networks}},
	isbn = {978-1-4503-3542-3},
	doi = {10.1145/2785956.2787483},
	abstract = {Debugging faults in complex networks often requires capturing and analyzing trafﬁc at the packet level. In this task, datacenter networks (DCNs) present unique challenges with their scale, trafﬁc volume, and diversity of faults. To troubleshoot faults in a timely manner, DCN administrators must a) identify affected packets inside large volume of trafﬁc; b) track them across multiple network components; c) analyze trafﬁc traces for fault patterns; and d) test or conﬁrm potential causes. To our knowledge, no tool today can achieve both the speciﬁcity and scale required for this task.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Zhu, Yibo and Zhao, Ben Y. and Zheng, Haitao and Kang, Nanxi and Cao, Jiaxin and Greenberg, Albert and Lu, Guohan and Mahajan, Ratul and Maltz, Dave and Yuan, Lihua and Zhang, Ming},
	year = {2015},
	keywords = {datacenter network, failure detection, probe},
	pages = {479--491},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\VLQ5XT4L\\Zhu et al. - 2015 - Packet-Level Telemetry in Large Datacenter Network.pdf:application/pdf;Zhu 等。 - 2015 - Packet-Level Telemetry in Large Datacenter Network.pdf:C\:\\Users\\zzy\\Zotero\\storage\\3M5SE5YL\\Zhu 等。 - 2015 - Packet-Level Telemetry in Large Datacenter Network.pdf:application/pdf}
}

@inproceedings{biswas_large-scale_2015,
	title = {Large-scale {Measurements} of {Wireless} {Network} {Behavior}},
	isbn = {978-1-4503-3542-3},
	doi = {10.1145/2785956.2787489},
	abstract = {Meraki is a cloud-based network management system which provides centralized conﬁguration, monitoring, and network troubleshooting tools across hundreds of thousands of sites worldwide. As part of its architecture, the Meraki system has built a database of time-series measurements of wireless link, client, and application behavior for monitoring and debugging purposes. This paper studies an anonymized subset of measurements, containing data from approximately ten thousand radio access points, tens of thousands of links, and 5.6 million clients from one-week periods in January 2014 and January 2015 to provide a deeper understanding of realworld network behavior.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Biswas, Sanjit and Bicket, John and Wong, Edmund and Musaloiu-E, Raluca and Bhartia, Apurv and Aguayo, Dan},
	year = {2015},
	keywords = {802.11, large-scale measurements, network usage data},
	pages = {153--165},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\R8STVWK3\\Biswas et al. - 2015 - Large-scale Measurements of Wireless Network Behav.pdf:application/pdf;Biswas 等。 - 2015 - Large-scale Measurements of Wireless Network Behav.pdf:C\:\\Users\\zzy\\Zotero\\storage\\3N6NEFMY\\Biswas 等。 - 2015 - Large-scale Measurements of Wireless Network Behav.pdf:application/pdf}
}

@inproceedings{burnett_encore:_2015,
	title = {Encore: {Lightweight} {Measurement} of {Web} {Censorship} with {Cross}-{Origin} {Requests}},
	isbn = {978-1-4503-3542-3},
	shorttitle = {Encore},
	doi = {10.1145/2785956.2787485},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Burnett, Sam and Feamster, Nick},
	year = {2015},
	keywords = {network measurement, web censorship, web security},
	pages = {653--667},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\HYTLWJVX\\Burnett and Feamster - 2015 - Encore Lightweight Measurement of Web Censorship .pdf:application/pdf;Burnett 和 Feamster - 2015 - Encore Lightweight Measurement of Web Censorship .pdf:C\:\\Users\\zzy\\Zotero\\storage\\JPFCQ3Z8\\Burnett 和 Feamster - 2015 - Encore Lightweight Measurement of Web Censorship .pdf:application/pdf}
}

@inproceedings{czyz_measuring_2014,
	title = {Measuring {IPv}6 adoption},
	isbn = {978-1-4503-2836-4},
	doi = {10.1145/2619239.2626295},
	abstract = {After several IPv4 address exhaustion milestones in the last three years, it is becoming apparent that the world is running out of IPv4 addresses, and the adoption of the next generation Internet protocol, IPv6, though nascent, is accelerating. In order to better understand this unique and disruptive transition, we explore twelve metrics using ten global-scale datasets to create the longest and broadest measurement of IPv6 adoption to date. Using this perspective, we ﬁnd that adoption, relative to IPv4, varies by two orders of magnitude depending on the measure examined and that care must be taken when evaluating adoption metrics in isolation. Further, we ﬁnd that regional adoption is not uniform. Finally, and perhaps most surprisingly, we ﬁnd that over the last three years, the nature of IPv6 utilization—in terms of trafﬁc, content, reliance on transition technology, and performance—has shifted dramatically from prior ﬁndings, indicating a maturing of the protocol into production mode. We believe IPv6’s recent growth and this changing utilization signal a true quantum leap.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Czyz, Jakub and Allman, Mark and Zhang, Jing and Iekel-Johnson, Scott and Osterweil, Eric and Bailey, Michael},
	year = {2014},
	keywords = {dns, measurement, internet, IP, IPv4, IPv6},
	pages = {87--98},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\7NUCQ2DW\\Czyz et al. - 2014 - Measuring IPv6 Adoption.pdf:application/pdf;Czyz 等。 - 2014 - Measuring IPv6 adoption.pdf:C\:\\Users\\zzy\\Zotero\\storage\\GQGIT85M\\Czyz 等。 - 2014 - Measuring IPv6 adoption.pdf:application/pdf}
}

@book{international_conference_on_mobile_systems_applications_and_services_proceedings_2003,
	address = {Berkeley, Calif},
	title = {Proceedings of {MobiSys} 2003, the {First} {International} {Conference} on {Mobile} {Systems}, {Applications}, and {Services}: {May} 5 - 8, 2003, {San} {Francisco}, {CA}, {USA}},
	isbn = {978-1-931971-09-6},
	shorttitle = {Proceedings of {MobiSys} 2003, the {First} {International} {Conference} on {Mobile} {Systems}, {Applications}, and {Services}},
	abstract = {The complexity of networks has outpaced our tools to debug them; today, administrators use manual tools to diagnose problems. In this paper, we show how packet histories—the full stories of every packet’s journey through the network—can simplify network diagnosis. To demonstrate the usefulness of packet histories and the practical feasibility of constructing them, we built NetSight, an extensible platform that captures packet histories and enables applications to concisely and ﬂexibly retrieve packet histories of interest. Atop NetSight, we built four applications that illustrate its ﬂexibility: an interactive network debugger, a live invariant monitor, a path-aware history logger, and a hierarchical network proﬁler. On a single modern multi-core server, NetSight can process packet histories for the trafﬁc of multiple 10 Gb/s links. For larger networks, NetSight scales linearly with additional servers and scales even further with straightforward additions to hardware- and hypervisor-based switches.},
	language = {en},
	publisher = {USENIX Association},
	editor = {International Conference on Mobile Systems, Applications, {and} Services and Association for Computing Machinery and USENIX Association and Association for Computing Machinery},
	year = {2003},
	note = {OCLC: 249708838},
	file = {International Conference on Mobile Systems, Applications, and Services 等。 - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:C\:\\Users\\zzy\\Zotero\\storage\\C4XQ9CS4\\International Conference on Mobile Systems, Applications, and Services 等。 - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:application/pdf;International Conference on Mobile Systems, Applications, and Services et al. - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:C\:\\Users\\zzy\\Zotero\\storage\\C5874RP8\\International Conference on Mobile Systems, Applications, and Services et al. - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:application/pdf}
}

@inproceedings{moshref_dream:_2014,
	title = {{DREAM}: dynamic resource allocation for software-defined measurement},
	isbn = {978-1-4503-2836-4},
	shorttitle = {{DREAM}},
	doi = {10.1145/2619239.2626291},
	abstract = {Software-deﬁned networks can enable a variety of concurrent, dynamically instantiated, measurement tasks, that provide ﬁne-grain visibility into network trafﬁc. Recently, there have been many proposals to conﬁgure TCAM counters in hardware switches to monitor trafﬁc. However, the TCAM memory at switches is fundamentally limited and the accuracy of the measurement tasks is a function of the resources devoted to them on each switch. This paper describes an adaptive measurement framework, called DREAM, that dynamically adjusts the resources devoted to each measurement task, while ensuring a user-speciﬁed level of accuracy. Since the trade-off between resource usage and accuracy can depend upon the type of tasks, their parameters, and trafﬁc characteristics, DREAM does not assume an a priori characterization of this trade-off, but instead dynamically searches for a resource allocation that is sufﬁcient to achieve a desired level of accuracy. A prototype implementation and simulations with three network-wide measurement tasks (heavy hitter, hierarchical heavy hitter and change detection) and diverse trafﬁc show that DREAM can support more concurrent tasks with higher accuracy than several other alternatives.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Moshref, Masoud and Yu, Minlan and Govindan, Ramesh and Vahdat, Amin},
	year = {2014},
	keywords = {network-wide traffic measurement, resource allocation, software defined measurement},
	pages = {419--430},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\2U6GVKSE\\Moshref et al. - 2014 - DREAM Dynamic Resource Allocation for Software-de.pdf:application/pdf;Moshref 等。 - 2014 - DREAM dynamic resource allocation for software-de.pdf:C\:\\Users\\zzy\\Zotero\\storage\\VK8W4RT9\\Moshref 等。 - 2014 - DREAM dynamic resource allocation for software-de.pdf:application/pdf}
}

@inproceedings{kang_optimizing_2013,
	title = {Optimizing the "one big switch" abstraction in software-defined networks},
	isbn = {978-1-4503-2101-3},
	doi = {10.1145/2535372.2535373},
	abstract = {Software Deﬁned Networks (SDNs) support diverse network policies by oﬀering direct, network-wide control over how switches handle traﬃc. Unfortunately, many controller platforms force applications to grapple simultaneously with end-to-end connectivity constraints, routing policy, switch memory limits, and the hop-by-hop interactions between forwarding rules. We believe solutions to this complex problem should be factored in to three distinct parts: (1) high-level SDN applications should deﬁne their end-point connectivity policy on top of a “one big switch” abstraction; (2) a midlevel SDN infrastructure layer should decide on the hop-byhop routing policy; and (3) a compiler should synthesize an eﬀective set of forwarding rules that obey the user-deﬁned policies and adhere to the resource constraints of the underlying hardware. In this paper, we deﬁne and implement our proposed architecture, present eﬃcient rule-placement algorithms that distribute forwarding policies across general SDN networks while managing rule-space constraints, and show how to support dynamic, incremental update of policies. We evaluate the eﬀectiveness of our algorithms analytically by providing complexity bounds on their running time and rule space, as well as empirically, using both synthetic benchmarks, and real-world ﬁrewall and routing policies.},
	language = {en},
	urldate = {2018-04-14},
	publisher = {ACM Press},
	author = {Kang, Nanxi and Liu, Zhenming and Rexford, Jennifer and Walker, David},
	year = {2013},
	pages = {13--24},
	file = {2013 - Optimizing the one big switch abstraction in sof.pdf:C\:\\Users\\zzy\\Zotero\\storage\\9WB5U7GI\\Kang 等。 - 2013 - Optimizing the one big switch abstraction in sof.pdf:application/pdf;p13-kang.pdf:C\:\\Users\\zzy\\Zotero\\storage\\83HDYJHQ\\p13-kang.pdf:application/pdf}
}

@article{schweller_reversible_2007,
	title = {Reversible {Sketches}: {Enabling} {Monitoring} and {Analysis} {Over} {High}-{Speed} {Data} {Streams}},
	volume = {15},
	issn = {1063-6692},
	shorttitle = {Reversible {Sketches}},
	doi = {10.1109/TNET.2007.896150},
	abstract = {A key function for network trafﬁc monitoring and analysis is the ability to perform aggregate queries over multiple data streams. Change detection is an important primitive which can be extended to construct many aggregate queries. The recently proposed sketches are among the very few that can detect heavy changes online for high speed links, and thus support various aggregate queries in both temporal and spatial domains. However, it does not preserve the keys (e. g., source IP address) of ﬂows, making it difﬁcult to reconstruct the desired set of anomalous keys.},
	language = {en},
	number = {5},
	urldate = {2018-04-14},
	journal = {IEEE/ACM Transactions on Networking},
	author = {Schweller, Robert and Li, Zhichun and Chen, Yan and Gao, Yan and Gupta, Ashish and Zhang, Yin and Dinda, Peter A. and Kao, Ming-Yang and Memik, Gokhan},
	month = oct,
	year = {2007},
	keywords = {sketch and streaming algorithm},
	pages = {1059--1072},
	file = {Schweller 等。 - 2007 - Reversible Sketches Enabling Monitoring and Analy.pdf:C\:\\Users\\zzy\\Zotero\\storage\\4H89QP7M\\Schweller 等。 - 2007 - Reversible Sketches Enabling Monitoring and Analy.pdf:application/pdf}
}

@article{sharma_estimating_2006,
	title = {Estimating {Network} {Proximity} and {Latency}∗},
	volume = {36},
	abstract = {Network proximity and latency estimation is an important component in discovering and locating services and applications. With the growing number of services and service providers in the large-scale Internet, accurately estimating network proximity/latency with minimal probing overhead becomes essential for scalable deployment. Although there exist a number of network distance estimation schemes, they either rely on extensive infrastructure support, require the IP address of the potential targets, falsely cluster distant nodes, or perform poorly with even few measurement errors. We propose Netvigator, a scalable network proximity and latency estimation tool that uses information obtained from probing a small number of landmark nodes and intermediate routers (termed milestones) that are discovered en route to the landmarks, to identify the closest nodes. With very little additional probing overhead, Netvigator uses distance information to the milestones to accurately locate the closest nodes. We developed a Netvigator prototype and report our performance evaluation on PlanetLab and in the intranet of a large enterprise. Netvigator is a running service on PlanetLab as a part of HP Labs’ S3 (Scalable Sensing Service).},
	language = {en},
	number = {3},
	journal = {ACM SIGCOMM Computer Communication Review},
	author = {Sharma, Puneet and Xu, Zhichen and Banerjee, Sujata and Lee, Sung-Ju and Labs, Hewlett-Packard},
	year = {2006},
	pages = {11},
	file = {Sharma 等。 - 2006 - Estimating Network Proximity and Latency∗.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SZ9N9GTS\\Sharma 等。 - 2006 - Estimating Network Proximity and Latency∗.pdf:application/pdf}
}

@article{charikar_finding_2004,
	title = {Finding frequent items in data streams},
	volume = {312},
	issn = {03043975},
	doi = {10.1016/S0304-3975(03)00400-6},
	abstract = {We present a 1-pass algorithm for estimating the most frequent items in a data stream using limited storage space. Our method relies on a data structure called a COUNT SKETCH, which allows us to reliably estimate the frequencies of frequent items in the stream. Our algorithm achieves better space bounds than the previously known best algorithms for this problem for several natural distributions on the item frequencies. In addition, our algorithm leads directly to a 2-pass algorithm for the problem of estimating the items with the largest (absolute) change in frequency between two data streams. To our knowledge, this latter problem has not been previously studied in the literature.},
	language = {en},
	number = {1},
	urldate = {2018-04-14},
	journal = {Theoretical Computer Science},
	author = {Charikar, Moses and Chen, Kevin and Farach-Colton, Martin},
	month = jan,
	year = {2004},
	pages = {3--15},
	file = {Charikar 等。 - 2004 - Finding frequent items in data streams.pdf:C\:\\Users\\zzy\\Zotero\\storage\\WA2YY4N4\\Charikar 等。 - 2004 - Finding frequent items in data streams.pdf:application/pdf}
}

@inproceedings{huang_sketchvisor:_2017,
	title = {{SketchVisor}: {Robust} {Network} {Measurement} for {Software} {Packet} {Processing}},
	isbn = {978-1-4503-4653-5},
	shorttitle = {{SketchVisor}},
	doi = {10.1145/3098822.3098831},
	abstract = {Network measurement remains a missing piece in today's software packet processing platforms. Sketches provide a promising building block for filling this void by monitoring every packet with fixed-size memory and bounded errors. However, our analysis shows that existing sketch-based measurement solutions suffer from severe performance drops under high traffic load. Although sketches are efficiently designed, applying them in network measurement inevitably incurs heavy computational overhead. We present SketchVisor, a robust network measurement framework for software packet processing. It augments sketch-based measurement in the data plane with a fast path, which is activated under high traffic load to provide high-performance local measurement with slight accuracy degradations. It further recovers accurate network-wide measurement results via compressive sensing. We have built a SketchVisor prototype on top of Open vSwitch. Extensive testbed experiments show that SketchVisor achieves high throughput and high accuracy for a wide range of network measurement tasks and microbenchmarks.},
	urldate = {2018-04-14},
	booktitle = {Proceedings of the {Conference} of the {ACM} {Special} {Interest} {Group} on {Data} {Communication}},
	author = {Huang, Qun and Jin, Xin and Lee, Patrick P. C. and Li, Runhui and Tang, Lu and Chen, Yi-Chao and Zhang, Gong},
	year = {2017},
	keywords = {Software packet processing, network-wide traffic measurement, sketch and streaming algorithm},
	pages = {113--126},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\K49HH3WB\\Huang 等。 - 2017 - SketchVisor Robust Network Measurement for Softwa.pdf:application/pdf}
}

@inproceedings{bhasin_developing_2004,
	title = {Developing {Architectures} and {Technologies} for an {Evolvable} {NASA} {Space} {Communication} {Infrastructure}},
	isbn = {978-1-62410-024-6},
	doi = {10.2514/6.2004-3253},
	language = {en},
	urldate = {2018-04-15},
	publisher = {American Institute of Aeronautics and Astronautics},
	author = {Bhasin, Kul and Hayden, Jeffrey},
	month = may,
	year = {2004},
	file = {Bhasin and Hayden - 2004 - Developing Architectures and Technologies for an E.pdf:C\:\\Users\\zzy\\Zotero\\storage\\WX9WMEGG\\Bhasin and Hayden - 2004 - Developing Architectures and Technologies for an E.pdf:application/pdf}
}

@inproceedings{xue_omware:_2013,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '13},
	title = {{OMware}: {An} {Open} {Measurement} {Ware} for {Stable} {Residential} {Broadband} {Measurement}},
	isbn = {978-1-4503-2056-6},
	shorttitle = {{OMware}},
	doi = {10.1145/2486001.2491702},
	abstract = {A number of home-installed middleboxes, e.g., BISMark and SamKnows, and web-based tools, e.g., Netalyzr and Ookla's speedtest service, have been developed recently to enable residential broadband users to gauge their network service quality. One challenge to designing these systems is to provide stable network measurement. That is, the measurement results will not be fluctuated by sporadic overheads incurred inside the middlebox or web browser. In this poster, we propose a network measurement ware, OMware, to increase the stability of residential broadband measurement. The key feature is to implement the send and receive functions for measurement packets in the kernel. Our preliminary evaluation for an OpenWrt implementation shows that OMware provides very stable throughput and delay measurement, compared with typical socket-based measurement at the user level.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2013 {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Xue, Lei and Mok, Ricky K.P. and Chang, Rocky K.C.},
	year = {2013},
	keywords = {high performance, network measurement, openwrt kernel module},
	pages = {497--498},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\KW4UFTKL\\Xue et al. - 2013 - OMware An Open Measurement Ware for Stable Reside.pdf:application/pdf;ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\MUE8C978\\Xue et al. - 2013 - OMware An Open Measurement Ware for Stable Reside.pdf:application/pdf}
}

@inproceedings{chen_provider-side_2013,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '13},
	title = {A {Provider}-side {View} of {Web} {Search} {Response} {Time}},
	isbn = {978-1-4503-2056-6},
	doi = {10.1145/2486001.2486035},
	abstract = {Using a large Web search service as a case study, we highlight the challenges that modern Web services face in understanding and diagnosing the response time experienced by users. We show that search response time (SRT) varies widely over time and also exhibits counter-intuitive behavior. It is actually higher during off-peak hours, when the query load is lower, than during peak hours. To resolve this paradox and explain SRT variations in general, we develop an analysis framework that separates systemic variations due to periodic changes in service usage and anomalous variations due to unanticipated events such as failures and denial-of-service attacks. We find that systemic SRT variations are primarily caused by systemic changes in aggregate network characteristics, nature of user queries, and browser types. For instance, one reason for higher SRTs during off-peak hours is that during those hours a greater fraction of queries come from slower, mainly-residential networks. We also develop a technique that, by factoring out the impact of such variations, robustly detects and diagnoses performance anomalies in SRT. Deployment experience shows that our technique detects three times more true (operator-verified) anomalies than existing techniques.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2013 {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Chen, Yingying and Mahajan, Ratul and Sridharan, Baskar and Zhang, Zhi-Li},
	year = {2013},
	keywords = {anomaly detection and diagnosis, performance monitoring, search response time, web services},
	pages = {243--254},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\4SPMZW5V\\Chen et al. - 2013 - A Provider-side View of Web Search Response Time.pdf:application/pdf}
}

@inproceedings{gao_empirical_2013,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '13},
	title = {An {Empirical} {Reexamination} of {Global} {DNS} {Behavior}},
	isbn = {978-1-4503-2056-6},
	doi = {10.1145/2486001.2486018},
	abstract = {The performance and operational characteristics of the DNS protocol are of deep interest to the research and network operations community. In this paper, we present measurement results from a unique dataset containing more than 26 billion DNS query-response pairs collected from more than 600 globally distributed recursive DNS resolvers. We use this dataset to reaffirm findings in published work and notice some significant differences that could be attributed both to the evolving nature of DNS traffic and to our differing perspective. For example, we find that although characteristics of DNS traffic vary greatly across networks, the resolvers within an organization tend to exhibit similar behavior. We further find that more than 50\% of DNS queries issued to root servers do not return successful answers, and that the primary cause of lookup failures at root servers is malformed queries with invalid TLDs. Furthermore, we propose a novel approach that detects malicious domain groups using temporal correlation in DNS queries. Our approach requires no comprehensive labeled training set, which can be difficult to build in practice. Instead, it uses a known malicious domain as anchor, and identifies the set of previously unknown malicious domains that are related to the anchor domain. Experimental results illustrate the viability of this approach, i.e. , we attain a true positive rate of more than 96\%, and each malicious anchor domain results in a malware domain group with more than 53 previously unknown malicious domains on average.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2013 {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Gao, Hongyu and Yegneswaran, Vinod and Chen, Yan and Porras, Phillip and Ghosh, Shalini and Jiang, Jian and Duan, Haixin},
	year = {2013},
	keywords = {dns, malicious domain detection, measurement},
	pages = {267--278},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\CEQSN4LI\\Gao et al. - 2013 - An Empirical Reexamination of Global DNS Behavior.pdf:application/pdf}
}

@inproceedings{sundaresan_broadband_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {Broadband {Internet} {Performance}: {A} {View} from the {Gateway}},
	isbn = {978-1-4503-0797-0},
	shorttitle = {Broadband {Internet} {Performance}},
	doi = {10.1145/2018436.2018452},
	abstract = {We present the first study of network access link performance measured directly from home gateway devices. Policymakers, ISPs, and users are increasingly interested in studying the performance of Internet access links. Because of many confounding factors in a home network or on end hosts, however, thoroughly understanding access network performance requires deploying measurement infrastructure in users' homes as gateway devices. In conjunction with the Federal Communication Commission's study of broadband Internet access in the United States, we study the throughput and latency of network access links using longitudinal measurements from nearly 4,000 gateway devices across 8 ISPs from a deployment of over 4,200 devices. We study the performance users achieve and how various factors ranging from the user's choice of modem to the ISP's traffic shaping policies can affect performance. Our study yields many important findings about the characteristics of existing access networks. Our findings also provide insights into the ways that access network performance should be measured and presented to users, which can help inform ongoing broader efforts to benchmark the performance of access networks.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Sundaresan, Srikanth and de Donato, Walter and Feamster, Nick and Teixeira, Renata and Crawford, Sam and Pescapè, Antonio},
	year = {2011},
	keywords = {access networks, benchmarking, bismark, broadband networks},
	pages = {134--145},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\PZZIMRFN\\Sundaresan et al. - 2011 - Broadband Internet Performance A View from the Ga.pdf:application/pdf}
}

@inproceedings{wang_untold_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {An {Untold} {Story} of {Middleboxes} in {Cellular} {Networks}},
	isbn = {978-1-4503-0797-0},
	doi = {10.1145/2018436.2018479},
	abstract = {The use of cellular data networks is increasingly popular as network coverage becomes more ubiquitous and many diverse user-contributed mobile applications become available. The growing cellular traffic demand means that cellular network carriers are facing greater challenges to provide users with good network performance and energy efficiency, while protecting networks from potential attacks. To better utilize their limited network resources while securing the network and protecting client devices the carriers have already deployed various network policies that influence traffic behavior. Today, these policies are mostly opaque, though they directly impact application designs and may even introduce network vulnerabilities. We present NetPiculet, the first tool that unveils carriers' NAT and firewall policies by conducting intelligent measurement. By running NetPiculet on the major U.S. cellular providers as well as deploying it as a smartphone application in the wild covering more than 100 cellular ISPs, we identified the key NAT and firewall policies which have direct implications on performance, energy, and security. For example, NAT boxes and firewalls set timeouts for idle TCP connections, which sometimes cause significant energy waste on mobile devices. Although most carriers today deploy sophisticated firewalls, they are still vulnerable to various attacks such as battery draining and denial of service. These findings can inform developers in optimizing the interaction between mobile applications and cellular networks and also guide carriers in improving their network configurations.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Wang, Zhaoguang and Qian, Zhiyun and Xu, Qiang and Mao, Zhuoqing and Zhang, Ming},
	year = {2011},
	keywords = {cellular data network, firewall, middlebox, nat, tcp performance},
	pages = {374--385},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\A9ZXLI9Y\\Wang et al. - 2011 - An Untold Story of Middleboxes in Cellular Network.pdf:application/pdf}
}

@inproceedings{ager_anatomy_2012,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '12},
	title = {Anatomy of a {Large} {European} {IXP}},
	isbn = {978-1-4503-1419-0},
	doi = {10.1145/2342356.2342393},
	abstract = {The largest IXPs carry on a daily basis traffic volumes in the petabyte range, similar to what some of the largest global ISPs reportedly handle. This little-known fact is due to a few hundreds of member ASes exchanging traffic with one another over the IXP's infrastructure. This paper reports on a first-of-its-kind and in-depth analysis of one of the largest IXPs worldwide based on nine months' worth of sFlow records collected at that IXP in 2011. A main finding of our study is that the number of actual peering links at this single IXP exceeds the number of total AS links of the peer-peer type in the entire Internet known as of 2010! To explain such a surprisingly rich peering fabric, we examine in detail this IXP's ecosystem and highlight the diversity of networks that are members at this IXP and connect there with other member ASes for reasons that are similarly diverse, but can be partially inferred from their business types and observed traffic patterns. In the process, we investigate this IXP's traffic matrix and illustrate what its temporal and structural properties can tell us about the member ASes that generated the traffic in the first place. While our results suggest that these large IXPs can be viewed as a microcosm of the Internet ecosystem itself, they also argue for a re-assessment of the mental picture that our community has about this ecosystem.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2012 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communication}},
	publisher = {ACM},
	author = {Ager, Bernhard and Chatzis, Nikolaos and Feldmann, Anja and Sarrar, Nadi and Uhlig, Steve and Willinger, Walter},
	year = {2012},
	keywords = {internet exchange points, internet topology, traffic characterization},
	pages = {163--174},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\XCGXRFIF\\Ager et al. - 2012 - Anatomy of a Large European IXP.pdf:application/pdf}
}

@inproceedings{lall_data_2006,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '06/{Performance} '06},
	title = {Data {Streaming} {Algorithms} for {Estimating} {Entropy} of {Network} {Traffic}},
	isbn = {978-1-59593-319-5},
	doi = {10.1145/1140277.1140295},
	abstract = {Using entropy of traffic distributions has been shown to aid a wide variety of network monitoring applications such as anomaly detection, clustering to reveal interesting patterns, and traffic classification. However, realizing this potential benefit in practice requires accurate algorithms that can operate on high-speed links, with low CPU and memory requirements. In this paper, we investigate the problem of estimating the entropy in a streaming computation model. We give lower bounds for this problem, showing that neither approximation nor randomization alone will let us compute the entropy efficiently. We present two algorithms for randomly approximating the entropy in a time and space efficient manner, applicable for use on very high speed (greater than OC-48) links. The first algorithm for entropy estimation is inspired by the structural similarity with the seminal work of Alon et al. for estimating frequency moments, and we provide strong theoretical guarantees on the error and resource usage. Our second algorithm utilizes the observation that the performance of the streaming algorithm can be enhanced by separating the high-frequency items (or elephants) from the low-frequency items (or mice). We evaluate our algorithms on traffic traces from different deployment scenarios.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {Joint} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Lall, Ashwin and Sekar, Vyas and Ogihara, Mitsunori and Xu, Jun and Zhang, Hui},
	year = {2006},
	keywords = {traffic analysis, sketch and streaming algorithm, entropy of information theory},
	pages = {145--156},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WINXXXDN\\Lall et al. - 2006 - Data Streaming Algorithms for Estimating Entropy o.pdf:application/pdf}
}

@inproceedings{roy_characterizing_2013,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '13},
	title = {Characterizing {Correlated} {Latency} {Anomalies} in {Broadband} {Access} {Networks}},
	isbn = {978-1-4503-2056-6},
	doi = {10.1145/2486001.2491734},
	abstract = {The growing prevalence of broadband Internet access around the world has made understanding the performance and reliability of broadband access networks extremely important. To better understand the performance anomalies that arise in broadband access networks, we have deployed hundreds of routers in home broadband access networks around the world and are studying the performance of these networks. One of the performance pathologies that we have observed is correlated, sudden latency increases simultaneously and to multiple destinations. In this work, we provide an preliminary glimpse into these sudden latency increases and attempt to understand their causes. Although we do not isolate root cause in this study, observing the sets of destinations that experience correlated latency increases can provide important clues as to the locations in the network that may be inducing these pathologies. We present an algorithm to better identify the network locations that are likely responsible for these pathologies. We then analyze latency data from one month across our home router deployment to determine where in the network latency issues are arising, and how those pathologies differ across regions, ISPs, and countries. Our preliminary analysis suggests that most latency pathologies are to a single destination and a relatively small percentage of these pathologies are likely in the last mile, suggesting that peering within the network may be a more likely culprit for these pathologies than access link problems.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2013 {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Roy, Swati and Feamster, Nick},
	year = {2013},
	keywords = {measurement, active probing, performance},
	pages = {525--526},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\UFRMT8QP\\Roy and Feamster - 2013 - Characterizing Correlated Latency Anomalies in Bro.pdf:application/pdf}
}

@inproceedings{dischinger_detecting_2008,
	address = {New York, NY, USA},
	series = {{IMC} '08},
	title = {Detecting {Bittorrent} {Blocking}},
	isbn = {978-1-60558-334-1},
	doi = {10.1145/1452520.1452523},
	abstract = {Recently, it has been reported that certain access ISPs are surreptitiously blocking their customers from uploading data using the popular BitTorrent file-sharing protocol. The reports have sparked an intense and wide-ranging policy debate on network neutrality and ISP traffic management practices. However, to date, end users lack access to measurement tools that can detect whether their access ISPs are blocking their BitTorrent traffic. And since ISPs do not voluntarily disclose their traffic management policies, no one knows how widely BitTorrent traffic blocking is deployed in the current Internet. In this paper, we address this problem by designing an easy-to-use tool to detect BitTorrent blocking and by presenting results from a widely used public deployment of the tool.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the 8th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Dischinger, Marcel and Mislove, Alan and Haeberlen, Andreas and Gummadi, Krishna P.},
	year = {2008},
	keywords = {network measurement, bittorrent, blocking},
	pages = {3--8},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\Q8HTCHMZ\\Dischinger et al. - 2008 - Detecting Bittorrent Blocking.pdf:application/pdf;Dischinger 等。 - 2008 - Detecting bittorrent blocking.pdf:C\:\\Users\\zzy\\Zotero\\storage\\UFUGSVAI\\Dischinger 等。 - 2008 - Detecting bittorrent blocking.pdf:application/pdf}
}

@article{duffield_estimating_2005,
	title = {Estimating {Flow} {Distributions} from {Sampled} {Flow} {Statistics}},
	volume = {13},
	issn = {1063-6692},
	doi = {10.1109/TNET.2005.852874},
	abstract = {Passive traffic measurement increasingly employs sampling at the packet level. Many high-end routers form flow statistics from a sampled substream of packets. Sampling controls the consumption of resources by the measurement operations. However, knowledge of the statistics of flows in the unsampled stream remains useful, for understanding both characteristics of source traffic, and consumption of resources in the network. This paper provides methods that use flow statistics formed from sampled packet stream to infer the frequencies of the number of packets per flow in the unsampled stream. A key task is to infer the properties of flows of original traffic that evaded sampling altogether. We achieve this through statistical inference, and by exploiting protocol level detail reported in flow records. We investigate the impact on our results of different versions of packet sampling.},
	number = {5},
	urldate = {2018-04-15},
	journal = {IEEE/ACM Trans. Netw.},
	author = {Duffield, Nick and Lund, Carsten and Thorup, Mikkel},
	month = oct,
	year = {2005},
	keywords = {measurement, IP flows, maximum likelihood estimation, measurement errors, sampling},
	pages = {933--946},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\LPFTQIU8\\Duffield et al. - 2005 - Estimating Flow Distributions from Sampled Flow St.pdf:application/pdf}
}

@article{sharma_estimating_2006-1,
	title = {Estimating {Network} {Proximity} and {Latency}},
	volume = {36},
	issn = {0146-4833},
	doi = {10.1145/1140086.1140092},
	abstract = {Network proximity and latency estimation is an important component in discovering and locating services and applications. With the growing number of services and service providers in the large-scale Internet, accurately estimating network proximity/latency with minimal probing overhead becomes essential for scalable deployment. Although there exist a number of network distance estimation schemes, they either rely on extensive infrastructure support, require the IP address of the potential targets, falsely cluster distant nodes, or perform poorly with even few measurement errors. We propose Netvigator, a scalable network proximity and latency estimation tool that uses information obtained from probing a small number of landmark nodes and intermediate routers (termed milestones) that are discovered en route to the landmarks, to identify the closest nodes. With very little additional probing overhead, Netvigator uses distance information to the milestones to accurately locate the closest nodes. We developed a Netvigator prototype and report our performance evaluation on PlanetLab and in the intranet of a large enterprise. Netvigator is a running service on PlanetLab as a part of HP Labs' S3 (Scalable Sensing Service).},
	number = {3},
	urldate = {2018-04-15},
	journal = {SIGCOMM Comput. Commun. Rev.},
	author = {Sharma, Puneet and Xu, Zhichen and Banerjee, Sujata and Lee, Sung-Ju},
	month = jul,
	year = {2006},
	keywords = {network measurement, network distance estimation},
	pages = {39--50},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\9EHGPTME\\Sharma et al. - 2006 - Estimating Network Proximity and Latency.pdf:application/pdf}
}

@inproceedings{li_flowradar:_2016,
	title = {{FlowRadar}: {A} {Better} {NetFlow} for {Data} {Centers}},
	isbn = {978-1-931971-29-4},
	shorttitle = {{FlowRadar}},
	abstract = {NetFlow has been a widely used monitoring tool with a variety of applications. NetFlow maintains an active working set of flows in a hash table that supports flow insertion, collision resolution, and flow removing. This is hard to implement in merchant silicon at data center switches, which has limited per-packet processing time. Therefore, many NetFlow implementations and other monitoring solutions have to sample or select a subset of packets to monitor. In this paper, we observe the need to monitor all the flows without sampling in short time scales. Thus, we design FlowRadar, a new way to maintain flows and their counters that scales to a large number of flows with small memory and bandwidth overhead. The key idea of FlowRadar is to encode perflow counters with a small memory and constant insertion time at switches, and then to leverage the computing power at the remote collector to perform network-wide decoding and analysis of the flow counters. Our evaluation shows that the memory usage of FlowRadar is close to traditional NetFlow with perfect hashing. With FlowRadar, operators can get better views into their networks as demonstrated by two new monitoring applications we build on top of FlowRadar.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the 13th {Usenix} {Conference} on {Networked} {Systems} {Design} and {Implementation}},
	author = {Li, Yuliang and Miao, Rui and Kim, Changhoon and Yu, Minlan},
	year = {2016},
	keywords = {network-wide traffic measurement},
	pages = {311--324},
	file = {Li 等。 - FlowRadar A Better NetFlow for Data Centers.pdf:C\:\\Users\\zzy\\Zotero\\storage\\PC64D65Z\\Li 等。 - FlowRadar A Better NetFlow for Data Centers.pdf:application/pdf}
}

@article{gursun_inferring_2012,
	title = {Inferring {Visibility}: {Who}'s ({Not}) {Talking} to {Whom}?},
	volume = {42},
	issn = {0146-4833},
	shorttitle = {Inferring {Visibility}},
	doi = {10.1145/2377677.2377713},
	abstract = {Consider this simple question: how can a network operator identify the set of routes that pass through its network? Answering this question is surprisingly hard: BGP only informs an operator about a limited set of routes. By observing traffic, an operator can only conclude that a particular route passes through its network -- but not that a route does not pass through its network. We approach this problem as one of statistical inference, bringing varying levels of additional information to bear: (1) the existence of traffic, and (2) the limited set of publicly available routing tables. We show that the difficulty depends critically on the position of the network in the overall Internet topology, and that the operators with the greatest incentive to solve this problem are those for which the problem is hardest. Nonetheless, we show that suitable application of nonparametric inference techniques can solve this problem quite accurately. For certain networks, traffic existence information yields good accuracy, while for other networks an accurate approach uses the "distance" between prefixes, according to a new network distance metric that we define. We then show how solving this problem leads to improved solutions for a particular application: traffic matrix completion.},
	number = {4},
	urldate = {2018-04-15},
	journal = {SIGCOMM Comput. Commun. Rev.},
	author = {Gürsun, Gonca and Ruchansky, Natali and Terzi, Evimaria and Crovella, Mark},
	month = aug,
	year = {2012},
	keywords = {bgp, matrix completion},
	pages = {151--162},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\SES47G2R\\Gürsun et al. - 2012 - Inferring Visibility Who's (Not) Talking to Whom.pdf:application/pdf}
}

@inproceedings{eriksson_network_2008,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '08},
	title = {Network {Discovery} from {Passive} {Measurements}},
	isbn = {978-1-60558-175-0},
	doi = {10.1145/1402958.1402992},
	abstract = {Understanding the Internet's structure through empirical measurements is important in the development of new topology generators, new protocols, traffic engineering, and troubleshooting, among other things. While prior studies of Internet topology have been based on active (traceroute-like) measurements, passive measurements of packet traffic offer the possibility of a greatly expanded perspective of Internet structure with much lower impact and management overhead. In this paper we describe a methodology for inferring network structure from passive measurements of IP packet traffic. We describe algorithms that enable 1) traffic sources that share network paths to be clustered accurately without relying on IP address or autonomous system information, 2) topological structure to be inferred accurately with only a small number of active measurements, 3) missing information to be recovered, which is a serious challenge in the use of passive packet measurements. We demonstrate our techniques using a series of simulated topologies and empirical data sets. Our experiments show that the clusters established by our method closely correspond to sources that actually share paths. We also show the trade-offs between selectively applied active probes and the accuracy of the inferred topology between sources. Finally, we characterize the degree to which missing information can be recovered from passive measurements, which further enhances the accuracy of the inferred topologies.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2008 {Conference} on {Data} {Communication}},
	publisher = {ACM},
	author = {Eriksson, Brian and Barford, Paul and Nowak, Robert},
	year = {2008},
	keywords = {measurement, embedding, imputation, inference, topology},
	pages = {291--302},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\VKCHXD56\\Eriksson et al. - 2008 - Network Discovery from Passive Measurements.pdf:application/pdf}
}

@inproceedings{dave_measuring_2012,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '12},
	title = {Measuring and {Fingerprinting} {Click}-spam in {Ad} {Networks}},
	isbn = {978-1-4503-1419-0},
	doi = {10.1145/2342356.2342394},
	abstract = {Advertising plays a vital role in supporting free websites and smartphone apps. Click-spam, i.e., fraudulent or invalid clicks on online ads where the user has no actual interest in the advertiser's site, results in advertising revenue being misappropriated by click-spammers. While ad networks take active measures to block click-spam today, the effectiveness of these measures is largely unknown. Moreover, advertisers and third parties have no way of independently estimating or defending against click-spam. In this paper, we take the first systematic look at click-spam. We propose the first methodology for advertisers to independently measure click-spam rates on their ads. We also develop an automated methodology for ad networks to proactively detect different simultaneous click-spam attacks. We validate both methodologies using data from major ad networks. We then conduct a large-scale measurement study of click-spam across ten major ad networks and four types of ads. In the process, we identify and perform in-depth analysis on seven ongoing click-spam attacks not blocked by major ad networks at the time of this writing. Our findings highlight the severity of the click-spam problem, especially for mobile ads.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2012 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communication}},
	publisher = {ACM},
	author = {Dave, Vacha and Guha, Saikat and Zhang, Yin},
	year = {2012},
	keywords = {advertising fraud, click fraud, click-spam, invalid clicks, traffic quality},
	pages = {175--186},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\C6FWWINU\\Dave et al. - 2012 - Measuring and Fingerprinting Click-spam in Ad Netw.pdf:application/pdf}
}

@inproceedings{otto_blind_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {On {Blind} {Mice} and the {Elephant}: {Understanding} the {Network} {Impact} of a {Large} {Distributed} {System}},
	isbn = {978-1-4503-0797-0},
	shorttitle = {On {Blind} {Mice} and the {Elephant}},
	doi = {10.1145/2018436.2018450},
	abstract = {A thorough understanding of the network impact of emerging large-scale distributed systems -- where traffic flows and what it costs -- must encompass users' behavior, the traffic they generate and the topology over which that traffic flows. In the case of BitTorrent, however, previous studies have been limited by narrow perspectives that restrict such analysis. This paper presents a comprehensive view of BitTorrent, using data from a representative set of 500,000 users sampled over a two year period, located in 169 countries and 3,150 networks. This unique perspective captures unseen trends and reveals several unexpected features of the largest peer-to-peer system. For instance, over the past year total BitTorrent traffic has increased by 12\%, driven by 25\% increases in per-peer hourly download volume despite a 10\% decrease in the average number of online peers. We also observe stronger diurnal usage patterns and, surprisingly given the bandwidth-intensive nature of the application, a close alignment between these patterns and overall traffic. Considering the aggregated traffic across access links, this has potential implications on BitTorrent-associated costs for Internet Service Providers (ISPs). Using data from a transit ISP, we find a disproportionately large impact on a commonly used burstable (95th-percentile) billing model. Last, when examining BitTorrent traffic's paths, we find that for over half its users, most network traffic never reaches large transit networks, but is instead carried by small transit ISPs. This raises questions on the effectiveness of most in-network monitoring systems to capture trends on peer-to-peer traffic and further motivates our approach.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Otto, John S. and Sánchez, Mario A. and Choffnes, David R. and Bustamante, Fabián E. and Siganos, Georgos},
	year = {2011},
	keywords = {evaluation, internet-scale systems, peer-to-peer},
	pages = {110--121},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\EFUGVFU2\\Otto et al. - 2011 - On Blind Mice and the Elephant Understanding the .pdf:application/pdf}
}

@inproceedings{cunha_predicting_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {Predicting and {Tracking} {Internet} {Path} {Changes}},
	isbn = {978-1-4503-0797-0},
	doi = {10.1145/2018436.2018451},
	abstract = {This paper investigates to what extent it is possible to use traceroute-style probing for accurately tracking Internet path changes. When the number of paths is large, the usual traceroute based approach misses many path changes because it probes all paths equally. Based on empirical observations, we argue that monitors can optimize probing according to the likelihood of path changes. We design a simple predictor of path changes using a nearest neighbor model. Although predicting path changes is not very accurate, we show that it can be used to improve probe targeting. Our path tracking method, called DTrack, detects up to two times more path changes than traditional probing, with lower detection delay, as well as providing complete load-balancer information.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Cunha, Italo and Teixeira, Renata and Veitch, Darryl and Diot, Christophe},
	year = {2011},
	keywords = {path changes, prediction, topology mapping, tracking},
	pages = {122--133},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\345YSIP9\\Cunha et al. - 2011 - Predicting and Tracking Internet Path Changes.pdf:application/pdf}
}

@inproceedings{krishnamurthy_sketch-based_2003,
	series = {{IMC} '03},
	title = {Sketch-based {Change} {Detection}: {Methods}, {Evaluation}, and {Applications}},
	isbn = {978-1-58113-773-6},
	shorttitle = {Sketch-based {Change} {Detection}},
	doi = {10.1145/948205.948236},
	abstract = {Traffic anomalies such as failures and attacks are commonplace in today's network, and identifying them rapidly and accurately is critical for large network operators. The detection typically treats the traffic as a collection of flows that need to be examined for significant changes in traffic pattern (eg, volume, number of connections). However, as link speeds and the number of flows increase, keeping per-flow state is either too expensive or too slow. We propose building compact summaries of the traffic data using the notion of sketches. We have designed a variant of the sketch data structure, k-ary sketch, which uses a constant, small amount of memory, and has constant per-record update and reconstruction cost. Its linearity property enables us to summarize traffic at various levels. We then implement a variety of time series forecast models (ARIMA, Holt-Winters, etc.) on top of such summaries and detect significant changes by looking for flows with large forecast errors. We also present heuristics for automatically configuring the model parameters.Using a large amount of real Internet traffic data from an operational tier-1 ISP, we demonstrate that our sketch-based change detection method is highly accurate, and can be implemented at low computation and memory costs. Our preliminary results are promising and hint at the possibility of using our method as a building block for network anomaly detection and traffic measurement.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the 3rd {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	author = {Krishnamurthy, Balachander and Sen, Subhabrata and Zhang, Yin and Chen, Yan},
	year = {2003},
	keywords = {change detection, network anomaly detection, time series analysis, forcasting technique, sketch and streaming algorithm},
	pages = {234--247},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\FUL5D33H\\Krishnamurthy et al. - 2003 - Sketch-based Change Detection Methods, Evaluation.pdf:application/pdf}
}

@inproceedings{quan_trinocular:_2013,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '13},
	title = {Trinocular: {Understanding} {Internet} {Reliability} {Through} {Adaptive} {Probing}},
	isbn = {978-1-4503-2056-6},
	shorttitle = {Trinocular},
	doi = {10.1145/2486001.2486017},
	abstract = {Natural and human factors cause Internet outages---from big events like Hurricane Sandy in 2012 and the Egyptian Internet shutdown in Jan. 2011 to small outages every day that go unpublicized. We describe Trinocular, an outage detection system that uses active probing to understand reliability of edge networks. Trinocular is principled: deriving a simple model of the Internet that captures the information pertinent to outages, and populating that model through long-term data, and learning current network state through ICMP probes. It is parsimonious, using Bayesian inference to determine how many probes are needed. On average, each Trinocular instance sends fewer than 20 probes per hour to each /24 network block under study, increasing Internet "background radiation" by less than 0.7\%. Trinocular is also predictable and precise: we provide known precision in outage timing and duration. Probing in rounds of 11 minutes, we detect 100\% of outages one round or longer, and estimate outage duration within one-half round. Since we require little traffic, a single machine can track 3.4M /24 IPv4 blocks, all of the Internet currently suitable for analysis. We show that our approach is significantly more accurate than the best current methods, with about one-third fewer false conclusions, and about 30\% greater coverage at constant accuracy. We validate our approach using controlled experiments, use Trinocular to analyze two days of Internet outages observed from three sites, and re-analyze three years of existing data to develop trends for the Internet.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2013 {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Quan, Lin and Heidemann, John and Pradkin, Yuri},
	year = {2013},
	keywords = {adaptive probing, bayesian inference, internet reliability, network outages},
	pages = {255--266},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\TX267JBT\\Quan et al. - 2013 - Trinocular Understanding Internet Reliability Thr.pdf:application/pdf}
}

@inproceedings{gill_understanding_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {Understanding {Network} {Failures} in {Data} {Centers}: {Measurement}, {Analysis}, and {Implications}},
	isbn = {978-1-4503-0797-0},
	shorttitle = {Understanding {Network} {Failures} in {Data} {Centers}},
	doi = {10.1145/2018436.2018477},
	abstract = {We present the first large-scale analysis of failures in a data center network. Through our analysis, we seek to answer several fundamental questions: which devices/links are most unreliable, what causes failures, how do failures impact network traffic and how effective is network redundancy? We answer these questions using multiple data sources commonly collected by network operators. The key findings of our study are that (1) data center networks show high reliability, (2) commodity switches such as ToRs and AggS are highly reliable, (3) load balancers dominate in terms of failure occurrences with many short-lived software related faults,(4) failures have potential to cause loss of many small packets such as keep alive messages and ACKs, and (5) network redundancy is only 40\% effective in reducing the median impact of failure.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Gill, Phillipa and Jain, Navendu and Nagappan, Nachiappan},
	year = {2011},
	keywords = {data centers, network reliability},
	pages = {350--361},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WCNIVPV5\\Gill et al. - 2011 - Understanding Network Failures in Data Centers Me.pdf:application/pdf}
}

@article{chang_leisure:_2015,
	title = {{LEISURE}: {Load}-{Balanced} {Network}-{Wide} {Traffic} {Measurement} and {Monitor} {Placement}},
	volume = {26},
	issn = {1045-9219},
	shorttitle = {{LEISURE}},
	doi = {10.1109/TPDS.2013.188},
	abstract = {Network-wide traffic measurement is of interest to network operators to uncover global network behavior for the management tasks of traffic accounting, debugging or troubleshooting, security, and traffic engineering. Increasingly, sophisticated network measurement tasks such as anomaly detection and security forensic analysis are requiring in-depth fine-grained flow-level measurements. However, performing in-depth per-flow measurements (e.g., detailed payload analysis) is often an expensive process. Given the fast-changing Internet traffic landscape and large traffic volume, a single monitor is not capable of accomplishing the measurement tasks for all applications of interest due to its resource constraint. Moreover, uncovering global network behavior requires network-wide traffic measurements at multiple monitors across the network since traffic measured at any single monitor only provides a partial view and may not be sufficient or accurate. These factors call for coordinated measurements among multiple distributed monitors. In this paper, we present a centralized optimization framework, LEISURE (Load-EqualIzed meaSUREment), for load-balancing network measurement workloads across distributed monitors. Specifically, we consider various load-balancing problems under different objectives and study their extensions to support both fixed and flexible monitor deployment scenarios. We formulate the latter flexible monitor deployment case as an MILP (Mixed Integer Linear Programming) problem and propose several heuristic algorithms to approximate the optimal solution and reduce the computation complexity. We evaluate LEISURE via detailed simulations on Abilene and GEANT network traces to show that LEISURE can achieve much better load-balanced performance (e.g., 4.75× smaller peak workload and 70× smaller variance in workloads) across all coordinated monitors in comparison to a naive solution (uniform assignment) to accomplish networ- -wide traffic measurement tasks under the fixed monitor deployment scenario. We also show that under the flexible monitor deployment setting, our heuristic solutions can achieve almost the same load-balancing performance as the optimal solution while reducing the computation times by a factor up to 22.5× in Abilene and 800× in GEANT.},
	number = {4},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Chang, C. W. and Huang, G. and Lin, B. and Chuah, C. N.},
	month = apr,
	year = {2015},
	keywords = {Abilene, Atmospheric measurements, centralized optimization framework, GEANT, global network behavior, integer programming, Internet, Internet traffic landscape, IP networks, LEISURE, linear programming, load-balanced network-wide traffic monitor placement, Load-balancing, load-balancing network measurement, load-equalized measurement, MILP, mixed integer linear programming, Monitoring, network-wide traffic measurement, Particle measurements, Routing, telecommunication traffic, Urban areas, optimization},
	pages = {1059--1070},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\HBNQKR4Q\\6573959.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\BH7QJMK7\\Chang et al. - 2015 - LEISURE Load-Balanced Network-Wide Traffic Measur.pdf:application/pdf}
}

@inproceedings{chaudet_optimal_2005,
	address = {New York, NY, USA},
	series = {{CoNEXT} '05},
	title = {Optimal {Positioning} of {Active} and {Passive} {Monitoring} {Devices}},
	isbn = {978-1-59593-197-9},
	doi = {10.1145/1095921.1095932},
	abstract = {Network measurement is essential for assessing performance issues, identifying and locating problems. Two common strategies are the passive approach that attaches specific devices to links in order to monitor the traffic that passes through the network and the active approach that generates explicit control packets in the network for measurements. One of the key issues in this domain is to minimize the overhead in terms of hardware, software, maintenance cost and additional traffic.In this paper, we study the problem of assigning tap devices for passive monitoring and beacons for active monitoring. Minimizing the number of devices and finding optimal strategic locations is a key issue, mandatory for deploying scalable monitoring platforms. In this article, we present a combinatorial view of the problem from which we derive complexity and approximability results, as well as efficient and versatile Mixed Integer Programming (MIP) formulations.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the 2005 {ACM} {Conference} on {Emerging} {Network} {Experiment} and {Technology}},
	publisher = {ACM},
	author = {Chaudet, Claude and Fleury, Eric and Lassous, Isabelle Guérin and Rivano, Hervé and Voge, Marie-Emilie},
	year = {2005},
	keywords = {active monitoring, optimization, passive monitoring},
	pages = {71--82},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\JPGFCJ6T\\Chaudet et al. - 2005 - Optimal Positioning of Active and Passive Monitori.pdf:application/pdf}
}

@inproceedings{ma_monitor_2014,
	title = {Monitor placement for maximal identifiability in network tomography},
	doi = {10.1109/INFOCOM.2014.6848079},
	abstract = {We investigate the problem of placing a given number of monitors in a communication network to identify the maximum number of link metrics from end-to-end measurements between monitors, assuming that link metrics are additive, and measurement paths cannot contain cycles. Motivated by our previous result that complete identification of all link metrics can require a large number of monitors, we focus on partial identification using a limited number of monitors. The basis to our solution is an efficient algorithm for determining all identifiable links for a given monitor placement. Based on this algorithm, we develop a polynomial-time greedy algorithm to incrementally place monitors such that each newly placed monitor maximizes the number of additional identifiable links. We prove that the proposed algorithm is optimal for 2-vertex-connected networks, and demonstrate that it is near-optimal for several real ISP topologies that are not 2-vertex-connected. Our solution provides a quantifiable tradeoff between level of identifiability and available monitor resources.},
	booktitle = {{IEEE} {INFOCOM} 2014 - {IEEE} {Conference} on {Computer} {Communications}},
	author = {Ma, L. and He, T. and Leung, K. K. and Swami, A. and Towsley, D.},
	month = apr,
	year = {2014},
	keywords = {Internet, Monitoring, Routing, Additives, communication network, computational complexity, Computers, Conferences, Electronic mail, end-to-end measurement, greedy algorithms, ISP topology, link metrics identification, maximal identifiability, Measurement, monitor placement problem, monitor resources, network theory (graphs), network tomography, partial identification, polynomial-time greedy algorithm, radio links, tomography, vertex connected network},
	pages = {1447--1455},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\BTVQUH3K\\6848079.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\5X46DGDT\\Ma et al. - 2014 - Monitor placement for maximal identifiability in n.pdf:application/pdf}
}

@inproceedings{rasley_planck:_2014,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '14},
	title = {Planck: {Millisecond}-scale {Monitoring} and {Control} for {Commodity} {Networks}},
	isbn = {978-1-4503-2836-4},
	shorttitle = {Planck},
	doi = {10.1145/2619239.2626310},
	abstract = {Software-defined networking introduces the possibility of building self-tuning networks that constantly monitor network conditions and react rapidly to important events such as congestion. Unfortunately, state-of-the-art monitoring mechanisms for conventional networks require hundreds of milliseconds to seconds to extract global network state, like link utilization or the identity of "elephant" flows. Such latencies are adequate for responding to persistent issues, e.g., link failures or long-lasting congestion, but are inadequate for responding to transient problems, e.g., congestion induced by bursty workloads sharing a link. In this paper, we present Planck, a novel network measurement architecture that employs oversubscribed port mirroring to extract network information at 280 µs--7 ms timescales on a 1 Gbps commodity switch and 275 µs--4 ms timescales on a 10 Gbps commodity switch,over 11x and 18x faster than recent approaches, respectively (and up to 291x if switch firmware allowed buffering to be disabled on some ports). To demonstrate the value of Planck's speed and accuracy, we use it to drive a traffic engineering application that can reroute congested flows in milliseconds. On a 10 Gbps commodity switch, Planck-driven traffic engineering achieves aggregate throughput within 1--4\% of optimal for most workloads we evaluated, even with flows as small as 50 MiB, an improvement of up to 53\% over previous schemes.},
	urldate = {2018-04-15},
	booktitle = {Proceedings of the 2014 {ACM} {Conference} on {SIGCOMM}},
	publisher = {ACM},
	author = {Rasley, Jeff and Stephens, Brent and Dixon, Colin and Rozner, Eric and Felter, Wes and Agarwal, Kanak and Carter, John and Fonseca, Rodrigo},
	year = {2014},
	keywords = {networking measurement, software-defined networking, traffic engineering, software defined networks},
	pages = {407--418},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\8QNKSQFY\\Rasley et al. - 2014 - Planck Millisecond-scale Monitoring and Control f.pdf:application/pdf;ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\LFF4JTEH\\Rasley et al. - 2014 - Planck Millisecond-scale Monitoring and Control f.pdf:application/pdf;Rasley 等。 - 2014 - Planck millisecond-scale monitoring and control f.pdf:C\:\\Users\\zzy\\Zotero\\storage\\T8BWXN4M\\Rasley 等。 - 2014 - Planck millisecond-scale monitoring and control f.pdf:application/pdf}
}

@inproceedings{suh_opensample:_2014,
	title = {{OpenSample}: {A} {Low}-{Latency}, {Sampling}-{Based} {Measurement} {Platform} for {Commodity} {SDN}},
	shorttitle = {{OpenSample}},
	doi = {10.1109/ICDCS.2014.31},
	abstract = {In this paper we propose, implement and evaluate OpenSample: a low-latency, sampling-based network measurement platform targeted at building faster control loops for software-defined networks. OpenSample leverages sFlow packet sampling to provide near-real-time measurements of both network load and individual flows. While OpenSample is useful in any context, it is particularly useful in an SDN environment where a network controller can quickly take action based on the data it provides. Using sampling for network monitoring allows OpenSample to have a 100 millisecond control loop rather than the 1-5 second control loop of prior polling-based approaches. We implement OpenSample in the Floodlight Open Flow controller and evaluate it both in simulation and on a test bed comprised of commodity switches. When used to inform traffic engineering, OpenSample provides up to a 150\% throughput improvement over both static equal-cost multi-path routing and a polling-based solution with a one second control loop.},
	booktitle = {2014 {IEEE} 34th {International} {Conference} on {Distributed} {Computing} {Systems}},
	author = {Suh, J. and Kwon, T. T. and Dixon, C. and Felter, W. and Carter, J.},
	month = jun,
	year = {2014},
	keywords = {Throughput, traffic engineering, Monitoring, telecommunication traffic, Measurement, commodity SDN, commodity switches, computer networks, control loops, Control systems, Data Center, Floodlight open flow controller, Maximum likelihood estimation, near-real-time measurements, network controller, network load, network monitoring, Network topology, polling-based approaches, Ports (Computers), program control structures, Radiation detectors, SDN environment, sFlow, static equal-cost multipath routing, telecommunication network routing, throughput improvement, Traffic Engineering, sampling, software defined networks},
	pages = {228--237},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\UYD58TPN\\6888899.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\NPQIP6ED\\Suh et al. - 2014 - OpenSample A Low-Latency, Sampling-Based Measurem.pdf:application/pdf;Suh 等。 - 2014 - OpenSample A Low-Latency, Sampling-Based Measurem.pdf:C\:\\Users\\zzy\\Zotero\\storage\\HYM5BCIU\\Suh 等。 - 2014 - OpenSample A Low-Latency, Sampling-Based Measurem.pdf:application/pdf}
}

@inproceedings{estan_new_2002,
	series = {{SIGCOMM} '02},
	title = {New {Directions} in {Traffic} {Measurement} and {Accounting}},
	isbn = {978-1-58113-570-1},
	doi = {10.1145/633025.633056},
	abstract = {Accurate network traffic measurement is required for accounting, bandwidth provisioning and detecting DoS attacks. These applications see the traffic as a collection of flows they need to measure. As link speeds and the number of flows increase, keeping a counter for each flow is too expensive (using SRAM) or slow (using DRAM). The current state-of-the-art methods (Cisco's sampled NetFlow) which log periodically sampled packets are slow, inaccurate and resource-intensive. Previous work showed that at different granularities a small number of "heavy hitters" accounts for a large share of traffic. Our paper introduces a paradigm shift for measurement by concentrating only on large flows --- those above some threshold such as 0.1\% of the link capacity.We propose two novel and scalable algorithms for identifying the large flows: sample and hold and multistage filters, which take a constant number of memory references per packet and use a small amount of memory. If \$M\$ is the available memory, we show analytically that the errors of our new algorithms are proportional to \$1/M\$; by contrast, the error of an algorithm based on classical sampling is proportional to \$1/{\textbackslash}sqrtM\$, thus providing much less accuracy for the same amount of memory. We also describe further optimizations such as early removal and conservative update that further improve the accuracy of our algorithms, as measured on real traffic traces, by an order of magnitude. Our schemes allow a new form of accounting called threshold accounting in which only flows above a threshold are charged by usage while the rest are charged a fixed fee. Threshold accounting generalizes usage-based and duration based pricing.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 2002 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	author = {Estan, Cristian and Varghese, George},
	year = {2002},
	keywords = {identifying large flows, network traffic measurement, on-line algorithms, scalability, usage based accounting},
	pages = {323--336},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\9JQK4BSY\\Estan and Varghese - 2002 - New Directions in Traffic Measurement and Accounti.pdf:application/pdf;Estan and Varghese - New Directions in Traffic Measurement and Accounti.pdf:C\:\\Users\\zzy\\Zotero\\storage\\39YCWT83\\Estan and Varghese - New Directions in Traffic Measurement and Accounti.pdf:application/pdf}
}

@inproceedings{khan_streaming_2011,
	address = {Washington, DC, USA},
	series = {{ANCS} '11},
	title = {Streaming {Solutions} for {Fine}-{Grained} {Network} {Traffic} {Measurements} and {Analysis}},
	isbn = {978-0-7695-4521-9},
	doi = {10.1109/ANCS.2011.45},
	abstract = {Streaming network traffic measurements and analysis is critical for detecting and preventing any real-time anomalies in the network. The high speeds and complexity of today's network make the traditional slow open-loop measurement schemes infeasible. We propose an alternate closed-loop measurement paradigm and demonstrate its practical realization. To the heart of our solution are three streaming algorithms that provide a tight integration between the measurement platform and the measurements. The algorithms cater to varying degrees of computational budgets, detection latency, and accuracy. We empirically evaluate our streaming solutions on a highly parallel and programmable measurement platform. The algorithms demonstrate a marked 100\% accuracy increase from a recently proposed MRT algorithm in detecting DoS attacks made up of synthetic hard-to-track elephant flows. Our proposed algorithms maintain the worst case complexities of the MRT, while empirically demonstrating a moderate increase in average resource utilization.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 2011 {ACM}/{IEEE} {Seventh} {Symposium} on {Architectures} for {Networking} and {Communications} {Systems}},
	publisher = {IEEE Computer Society},
	author = {Khan, Faisal and Hosein, Nicholas and Chuah, Chen-Nee and Ghiasi, Soheil},
	year = {2011},
	keywords = {Algorithm design and analysis, Network Anomaly Detection, Network Management, Real-time embedded systems, sketch and streaming algorithm},
	pages = {227--238},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\6D5HSMIM\\Khan et al. - 2011 - Streaming Solutions for Fine-Grained Network Traff.pdf:application/pdf}
}

@inproceedings{moshref_resource/accuracy_2013,
	address = {New York, NY, USA},
	series = {{HotSDN} '13},
	title = {Resource/{Accuracy} {Tradeoffs} in {Software}-defined {Measurement}},
	isbn = {978-1-4503-2178-5},
	doi = {10.1145/2491185.2491196},
	abstract = {Previous work on network measurements have explored several primitives of increasing complexity for measurement tasks at individual nodes, ranging from counters to hashing to arbitrary code fragments. In an SDN network, these primitives may require significant bandwidth, memory and processing resources, and the resources dedicated to these can affect the accuracy of the eventual measurement. In this paper, we first qualitatively discuss the tradeoff space of resource usage versus accuracy for these different primitives as a function of the spatial and temporal measurement granularity, then quantify these tradeoffs in the context of hierarchical heavy hitter detection.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the {Second} {ACM} {SIGCOMM} {Workshop} on {Hot} {Topics} in {Software} {Defined} {Networking}},
	publisher = {ACM},
	author = {Moshref, Masoud and Yu, Minlan and Govindan, Ramesh},
	year = {2013},
	keywords = {data center, hierarchical heavy hitter, software defined measurement, software defined networks},
	pages = {73--78},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\8K943DW2\\Moshref et al. - 2013 - ResourceAccuracy Tradeoffs in Software-defined Me.pdf:application/pdf}
}

@inproceedings{lakshminarayanan_bandwidth_2004,
	address = {New York, NY, USA},
	series = {{IMC} '04},
	title = {Bandwidth {Estimation} in {Broadband} {Access} {Networks}},
	isbn = {978-1-58113-821-4},
	doi = {10.1145/1028788.1028832},
	abstract = {There has been much work on developing techniques for estimating the capacity and the available bandwidth of network paths based on end-point measurements. The focus has primarily been on settings where the constrained link can be modeled as a point-to-point link with a well-defined bandwidth, serving packets in FIFO order. In this paper, we point out that broadband access networks, such as cable modem and 802.11-based wireless networks, break this model in various ways. The constrained link could (a) employ mechanisms such as token bucket rate regulation, (b) schedule packets in a non-FIFO manner, and (c) support multiple distinct rates. We study how these characteristics impede the operation of the various existing methods and tools for capacity and available bandwidth estimation, and present a new available bandwidth estimation technique, \textit{Probe- Gap}, that overcomes some of these difficulties. Our evaluation is based on experiments with actual 802.11a and cable modem links.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 4th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Lakshminarayanan, Karthik and Padmanabhan, Venkata N. and Padhye, Jitendra},
	year = {2004},
	keywords = {network measurement, broadband networks, available bandwidth, capacity},
	pages = {314--321},
	file = {2100.pdf:C\:\\Users\\zzy\\Zotero\\storage\\YGK2W7GL\\2100.pdf:application/pdf;ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\RIQ4ZKUR\\Lakshminarayanan et al. - 2004 - Bandwidth Estimation in Broadband Access Networks.pdf:application/pdf;Lakshminarayanan 等。 - 2004 - Bandwidth estimation in broadband access networks.pdf:C\:\\Users\\zzy\\Zotero\\storage\\HCRYCQIQ\\Lakshminarayanan 等。 - 2004 - Bandwidth estimation in broadband access networks.pdf:application/pdf}
}

@inproceedings{ben_basat_constant_2017,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '17},
	title = {Constant {Time} {Updates} in {Hierarchical} {Heavy} {Hitters}},
	isbn = {978-1-4503-4653-5},
	doi = {10.1145/3098822.3098832},
	abstract = {Monitoring tasks, such as anomaly and DDoS detection, require identifying frequent flow aggregates based on common IP prefixes. These are known as hierarchical heavy hitters (HHH), where the hierarchy is determined based on the type of prefixes of interest in a given application. The per packet complexity of existing HHH algorithms is proportional to the size of the hierarchy, imposing significant overheads. In this paper, we propose a randomized constant time algorithm for HHH. We prove probabilistic precision bounds backed by an empirical evaluation. Using four real Internet packet traces, we demonstrate that our algorithm indeed obtains comparable accuracy and recall as previous works, while running up to 62 times faster. Finally, we extended Open vSwitch (OVS) with our algorithm and showed it is able to handle 13.8 million packets per second. In contrast, incorporating previous works in OVS only obtained 2.5 times lower throughput.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the {Conference} of the {ACM} {Special} {Interest} {Group} on {Data} {Communication}},
	publisher = {ACM},
	author = {Ben Basat, Ran and Einziger, Gil and Friedman, Roy and Luizelli, Marcelo C. and Waisbard, Erez},
	year = {2017},
	keywords = {Monitoring, Measurement, Heavy Hitters, sketch and streaming algorithm},
	pages = {127--140},
	file = {2017 - Constant Time Updates in Hierarchical Heavy Hitter.pdf:C\:\\Users\\zzy\\Zotero\\storage\\MYD7TDP6\\Ben Basat 等。 - 2017 - Constant Time Updates in Hierarchical Heavy Hitter.pdf:application/pdf;ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\7LAQFVV9\\Ben Basat et al. - 2017 - Constant Time Updates in Hierarchical Heavy Hitter.pdf:application/pdf}
}

@inproceedings{sekar_csamp:_2008,
	address = {Berkeley, CA, USA},
	series = {{NSDI}'08},
	title = {{CSAMP}: {A} {System} for {Network}-wide {Flow} {Monitoring}},
	shorttitle = {{CSAMP}},
	abstract = {Critical network management applications increasingly demand fine-grained flow level measurements. However, current flow monitoring solutions are inadequate for many of these applications. In this paper, we present the design, implementation, and evaluation of CSAMP, a system-wide approach for flow monitoring. The design of CSAMP derives from three key ideas: flow sampling as a router primitive instead of uniform packet sampling; hash-based packet selection to achieve coordination without explicit communication; and a framework for distributing responsibilities across routers to achieve network-wide monitoring goals while respecting router resource constraints. We show that CSAMP achieves much greater monitoring coverage, better use of router resources, and enhanced ability to satisfy network-wide flow monitoring goals compared to existing solutions.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 5th {USENIX} {Symposium} on {Networked} {Systems} {Design} and {Implementation}},
	publisher = {USENIX Association},
	author = {Sekar, Vyas and Reiter, Michael K. and Willinger, Walter and Zhang, Hui and Kompella, Ramana Rao and Andersen, David G.},
	year = {2008},
	keywords = {network-wide traffic measurement},
	pages = {233--246},
	file = {CSAMP A System for Network-Wide Flow Monitoring.pdf:C\:\\Users\\zzy\\Zotero\\storage\\6ZK8X7L4\\Sekar 等。 - CSAMP A System for Network-Wide Flow Monitoring.pdf:application/pdf}
}

@inproceedings{ramachandran_fast_2008,
	address = {New York, NY, USA},
	series = {{IMC} '08},
	title = {Fast {Monitoring} of {Traffic} {Subpopulations}},
	isbn = {978-1-60558-334-1},
	doi = {10.1145/1452520.1452551},
	abstract = {Network accounting, forensics, security, and performance monitoring applications often need to examine detailed traces from subsets of flows ("subpopulations"), where the application desires flexibility in specifying the subpopulation (e.g., to detect a portscan, the application must observe many packets between a source and a destination with one packet to each port). However, the dynamism and volume of network traffic on many high-speed links necessitates traffic sampling, which adversely affects subpopulation monitoring: because many subpopulations of interest to operators are low-volume flows, conventional sampling schemes (e.g., uniform random sampling) miss much of the subpopulation's traffic. Today's routers and network devices provide scant support for monitoring specific traffic subpopulations. This paper presents the design, implementation, and evaluation of FlexSample, a traffic monitoring engine that dynamically extracts traffic from subpopulations that operators define using conditions on packet header fields. FlexSample uses a fast, flexible counter array to provide rough estimates of packets' membership in respective subpopulations. Based on these coarse estimates, FlexSample then makes per-packet sampling decisions to sample proportionately from each subpopulation (as specified by a network operator), subject to an overall sampling constraint. We apply FlexSample to extract subpopulations such as port scans and traffic to high-degree nodes and find that it is able to capture significantly more packets from these subpopulations than conventional approaches.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 8th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Ramachandran, Anirudh and Seetharaman, Srinivasan and Feamster, Nick and Vazirani, Vijay},
	year = {2008},
	keywords = {counters, sampling, traffic statistics, traffic subpopulations},
	pages = {257--270},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\3H87NX92\\Ramachandran et al. - 2008 - Fast Monitoring of Traffic Subpopulations.pdf:application/pdf;Ramachandran 等。 - 2008 - Fast monitoring of traffic subpopulations.pdf:C\:\\Users\\zzy\\Zotero\\storage\\6L9AQXI8\\Ramachandran 等。 - 2008 - Fast monitoring of traffic subpopulations.pdf:application/pdf}
}

@article{cormode_finding_2008,
	title = {Finding {Frequent} {Items} in {Data} {Streams}},
	volume = {1},
	issn = {2150-8097},
	doi = {10.14778/1454159.1454225},
	abstract = {The frequent items problem is to process a stream of items and find all items occurring more than a given fraction of the time. It is one of the most heavily studied problems in data stream mining, dating back to the 1980s. Many applications rely directly or indirectly on finding the frequent items, and implementations are in use in large scale industrial systems. However, there has not been much comparison of the different methods under uniform experimental conditions. It is common to find papers touching on this topic in which important related work is mischaracterized, overlooked, or reinvented. In this paper, we aim to present the most important algorithms for this problem in a common framework. We have created baseline implementations of the algorithms, and used these to perform a thorough experimental study of their properties. We give empirical evidence that there is considerable variation in the performance of frequent items algorithms. The best methods can be implemented to find frequent items with high accuracy using only tens of kilobytes of memory, at rates of millions of items per second on cheap modern hardware.},
	number = {2},
	urldate = {2018-04-16},
	journal = {Proc. VLDB Endow.},
	author = {Cormode, Graham and Hadjieleftheriou, Marios},
	month = aug,
	year = {2008},
	pages = {1530--1541},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\UVK4QL83\\Cormode and Hadjieleftheriou - 2008 - Finding Frequent Items in Data Streams.pdf:application/pdf}
}

@inproceedings{handigol_i_2014,
	address = {Berkeley, CA, USA},
	series = {{NSDI}'14},
	title = {I {Know} {What} {Your} {Packet} {Did} {Last} {Hop}: {Using} {Packet} {Histories} to {Troubleshoot} {Networks}},
	isbn = {978-1-931971-09-6},
	shorttitle = {I {Know} {What} {Your} {Packet} {Did} {Last} {Hop}},
	abstract = {The complexity of networks has outpaced our tools to debug them; today, administrators use manual tools to diagnose problems. In this paper, we show how packet histories--the full stories of every packet's journey through the network--can simplify network diagnosis. To demonstrate the usefulness of packet histories and the practical feasibility of constructing them, we built NetSight, an extensible platform that captures packet histories and enables applications to concisely and flexibly retrieve packet histories of interest. Atop NetSight, we built four applications that illustrate its flexibility: an interactive network debugger, a live invariant monitor, a path-aware history logger, and a hierarchical network profiler. On a single modern multi-core server, NetSight can process packet histories for the traffic of multiple 10 Gb/s links. For larger networks, NetSight scales linearly with additional servers and scales even further with straightforward additions to hardware- and hypervisor-based switches.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 11th {USENIX} {Conference} on {Networked} {Systems} {Design} and {Implementation}},
	publisher = {USENIX Association},
	author = {Handigol, Nikhil and Heller, Brandon and Jeyakumar, Vimalkumar and Mazières, David and McKeown, Nick},
	year = {2014},
	pages = {71--85},
	file = {International Conference on Mobile Systems, Applications, and Services et al. - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:C\:\\Users\\zzy\\Zotero\\storage\\CXMSVSZN\\International Conference on Mobile Systems, Applications, and Services et al. - 2003 - Proceedings of MobiSys 2003, the First Internation.pdf:application/pdf}
}

@inproceedings{narayana_language-directed_2017,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '17},
	title = {Language-{Directed} {Hardware} {Design} for {Network} {Performance} {Monitoring}},
	isbn = {978-1-4503-4653-5},
	doi = {10.1145/3098822.3098829},
	abstract = {Network performance monitoring today is restricted by existing switch support for measurement, forcing operators to rely heavily on endpoints with poor visibility into the network core. Switch vendors have added progressively more monitoring features to switches, but the current trajectory of adding specific features is unsustainable given the ever-changing demands of network operators. Instead, we ask what switch hardware primitives are required to support an expressive language of network performance questions. We believe that the resulting switch hardware design could address a wide variety of current and future performance monitoring needs. We present a performance query language, Marple, modeled on familiar functional constructs like map, filter, groupby, and zip. Marple is backed by a new programmable key-value store primitive on switch hardware. The key-value store performs flexible aggregations at line rate (e.g., a moving average of queueing latencies per flow), and scales to millions of keys. We present a Marple compiler that targets a P4-programmable software switch and a simulator for high-speed programmable switches. Marple can express switch queries that could previously run only on end hosts, while Marple queries only occupy a modest fraction of a switch's hardware resources.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the {Conference} of the {ACM} {Special} {Interest} {Group} on {Data} {Communication}},
	publisher = {ACM},
	author = {Narayana, Srinivas and Sivaraman, Anirudh and Nathan, Vikram and Goyal, Prateesh and Arun, Venkat and Alizadeh, Mohammad and Jeyakumar, Vimalkumar and Kim, Changhoon},
	year = {2017},
	keywords = {network hardware, Network measurement, network programming},
	pages = {85--98},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\TMN9LGKM\\Narayana et al. - 2017 - Language-Directed Hardware Design for Network Perf.pdf:application/pdf;Narayana 等。 - 2017 - Language-Directed Hardware Design for Network Perf.pdf:C\:\\Users\\zzy\\Zotero\\storage\\TT9RXJHT\\Narayana 等。 - 2017 - Language-Directed Hardware Design for Network Perf.pdf:application/pdf}
}

@inproceedings{benson_network_2010,
	title = {Network {Traffic} {Characteristics} of {Data} {Centers} in the {Wild}},
	isbn = {978-1-4503-0483-2},
	doi = {10.1145/1879141.1879175},
	abstract = {Although there is tremendous interest in designing improved networks for data centers, very little is known about the network-level traffic characteristics of data centers today. In this paper, we conduct an empirical study of the network traffic in 10 data centers belonging to three different categories, including university, enterprise campus, and cloud data centers. Our definition of cloud data centers includes not only data centers employed by large online service providers offering Internet-facing applications but also data centers used to host data-intensive (MapReduce style) applications). We collect and analyze SNMP statistics, topology and packet-level traces. We examine the range of applications deployed in these data centers and their placement, the flow-level and packet-level transmission properties of these applications, and their impact on network and link utilizations, congestion and packet drops. We describe the implications of the observed traffic patterns for data center internal traffic engineering as well as for recently proposed architectures for data center networks.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 10th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	author = {Benson, Theophilus and Akella, Aditya and Maltz, David A.},
	year = {2010},
	keywords = {characterization, data center traffic, characteristics of network traffic},
	pages = {267--280},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\8G857B5A\\Benson et al. - 2010 - Network Traffic Characteristics of Data Centers in.pdf:application/pdf;Benson 等。 - 2010 - Network traffic characteristics of data centers in.pdf:C\:\\Users\\zzy\\Zotero\\storage\\2CIDEPGN\\Benson 等。 - 2010 - Network traffic characteristics of data centers in.pdf:application/pdf}
}

@inproceedings{jose_online_2011,
	address = {Berkeley, CA, USA},
	series = {Hot-{ICE}'11},
	title = {Online {Measurement} of {Large} {Traffic} {Aggregates} on {Commodity} {Switches}},
	url = {http://dl.acm.org/citation.cfm?id=1972422.1972439},
	abstract = {Traffic measurement plays an important role in many network-management tasks, such as anomaly detection and traffic engineering. However, existing solutions either rely on custom hardware designed for a specific task, or introduce a high overhead for data collection and analysis. Instead, we argue that a practical traffic-measurement solution should run on commodity network elements, support a range of measurement tasks, and provide accurate results with low overhead. Inspired by the capabilities of OpenFlow switches, we explore a measurement framework where switches match packets against a small collection of rules and update traffic counters for the highest-priority match. A separate controller can read the counters and dynamically tune the rules to quickly "drill down" to identify large traffic aggregates. As the first step towards designing measurement algorithms for this framework, we design and evaluate a hierarchical heavy hitters algorithm that identifies large traffic aggregates, while striking a good balance between measurement accuracy and switch overhead.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 11th {USENIX} {Conference} on {Hot} {Topics} in {Management} of {Internet}, {Cloud}, and {Enterprise} {Networks} and {Services}},
	publisher = {USENIX Association},
	author = {Jose, Lavanya and Yu, Minlan and Rexford, Jennifer},
	year = {2011},
	pages = {13--13},
	file = {Jose 等。 - Online Measurement of Large Trafﬁc Aggregates on C.pdf:C\:\\Users\\zzy\\Zotero\\storage\\IJN5HZ7F\\Jose 等。 - Online Measurement of Large Trafﬁc Aggregates on C.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\HEL29TYX\\s.html:text/html}
}

@inproceedings{guo_pingmesh:_2015,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '15},
	title = {Pingmesh: {A} {Large}-{Scale} {System} for {Data} {Center} {Network} {Latency} {Measurement} and {Analysis}},
	isbn = {978-1-4503-3542-3},
	shorttitle = {Pingmesh},
	doi = {10.1145/2785956.2787496},
	abstract = {Can we get network latency between any two servers at any time in large-scale data center networks? The collected latency data can then be used to address a series of challenges: telling if an application perceived latency issue is caused by the network or not, defining and tracking network service level agreement (SLA), and automatic network troubleshooting. We have developed the Pingmesh system for large-scale data center network latency measurement and analysis to answer the above question affirmatively. Pingmesh has been running in Microsoft data centers for more than four years, and it collects tens of terabytes of latency data per day. Pingmesh is widely used by not only network software developers and engineers, but also application and service developers and operators.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 2015 {ACM} {Conference} on {Special} {Interest} {Group} on {Data} {Communication}},
	publisher = {ACM},
	author = {Guo, Chuanxiong and Yuan, Lihua and Xiang, Dong and Dang, Yingnong and Huang, Ray and Maltz, Dave and Liu, Zhaoyi and Wang, Vin and Pang, Bin and Chen, Hua and Lin, Zhi-Wei and Kurien, Varugis},
	year = {2015},
	keywords = {data center networking, network troubleshooting, silent packet drops},
	pages = {139--152},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\AQ4CBHH2\\Guo et al. - 2015 - Pingmesh A Large-Scale System for Data Center Net.pdf:application/pdf;Guo 等。 - 2015 - Pingmesh A Large-Scale System for Data Center Net.pdf:C\:\\Users\\zzy\\Zotero\\storage\\LZXNE5EL\\Guo 等。 - 2015 - Pingmesh A Large-Scale System for Data Center Net.pdf:application/pdf}
}

@inproceedings{yuan_progme:_2007,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '07},
	title = {{ProgME}: {Towards} {Programmable} {Network} {Measurement}},
	isbn = {978-1-59593-713-1},
	shorttitle = {{ProgME}},
	doi = {10.1145/1282380.1282392},
	abstract = {Traffic measurements provide critical input for a wide range of network management applications, including traffic engineering, accounting, and security analysis. Existing measurement tools collect traffic statistics based on some pre-determined, inflexible concept of "flows". They do not have sufficient built-in intelligence to understand the application requirements or adapt to the traffic conditions. Consequently, they have limited scalability with respect to the number of flows and the heterogeneity of monitoring applications. We present ProgME, a Programmable MEasurement architecture based on a novel concept of flowset - arbitrary set of flows defined according to application requirements and/or traffic conditions. Through a simple flowset composition language, ProgME can incorporate application requirements, adapt itself to circumvent the challenges on scalability posed by the large number of flows, and achieve a better application-perceived accuracy. ProgME can analyze and adapt to traffic statistics in real-time. Using sequential hypothesis test, ProgME can achieve fast and scalable heavy hitter identification.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 2007 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	publisher = {ACM},
	author = {Yuan, Lihua and Chuah, Chen-Nee and Mohapatra, Prasant},
	year = {2007},
	keywords = {flowset, flowset composition language, multi-resolution tiling, programmable measurment, traffic measurement},
	pages = {97--108},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\ATXKK5HQ\\Yuan et al. - 2007 - ProgME Towards Programmable Network Measurement.pdf:application/pdf;Yuan 等。 - ProgME Towards Programmable Network MEasurement.pdf:C\:\\Users\\zzy\\Zotero\\storage\\8V5WZBZ5\\Yuan 等。 - ProgME Towards Programmable Network MEasurement.pdf:application/pdf}
}

@inproceedings{alipourfard_re-evaluating_2015,
	address = {New York, NY, USA},
	series = {{HotNets}-{XIV}},
	title = {Re-evaluating {Measurement} {Algorithms} in {Software}},
	isbn = {978-1-4503-4047-2},
	doi = {10.1145/2834050.2834064},
	abstract = {With the advancement of multicore servers, there is a new trend of moving network functions to software servers. Measurement is critical to most network functions as it not only helps the operators understand the network usage and detect anomalies, but also produces feedback to the control loop in management tasks such as load balancing and traffic engineering. Traditional researches on measurement algorithms mainly focus on reducing the memory usage leveraging the fact that measurement can sustain bounded inaccuracy. In this study, we re-evaluate these algorithms on software servers in order to understand their tradeoffs of accuracy and performance. We observe that simple hash tables work better than more advanced measurement algorithms for a variety of measurement scenarios. This is because with better cache design in modern servers and the skewness in the access patterns of measurement tasks, the memory usage of measurement tasks is largely irrelevant to the packet processing performance.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 14th {ACM} {Workshop} on {Hot} {Topics} in {Networks}},
	publisher = {ACM},
	author = {Alipourfard, Omid and Moshref, Masoud and Yu, Minlan},
	year = {2015},
	pages = {20:1--20:7},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\8YDMW9B9\\Alipourfard et al. - 2015 - Re-evaluating Measurement Algorithms in Software.pdf:application/pdf;Alipourfard 等。 - 2015 - Re-evaluating Measurement Algorithms in Software.pdf:C\:\\Users\\zzy\\Zotero\\storage\\2EHA895G\\Alipourfard 等。 - 2015 - Re-evaluating Measurement Algorithms in Software.pdf:application/pdf}
}

@inproceedings{sekar_revisiting_2010,
	address = {New York, NY, USA},
	series = {{IMC} '10},
	title = {Revisiting the {Case} for a {Minimalist} {Approach} for {Network} {Flow} {Monitoring}},
	isbn = {978-1-4503-0483-2},
	doi = {10.1145/1879141.1879186},
	abstract = {Network management applications require accurate estimates of a wide range of flow-level traffic metrics. Given the inadequacy of current packet-sampling-based solutions, several application-specific monitoring algorithms have emerged. While these provide better accuracy for the specific applications they target, they increase router complexity and require vendors to commit to hardware primitives without knowing how useful they will be to meet the needs of future applications. In this paper, we show using trace-driven evaluations that such complexity and early commitment may not be necessary. We revisit the case for a "minimalist" approach in which a small number of simple yet generic router primitives collect flow-level data from which different traffic metrics can be estimated. We demonstrate the feasibility and promise of such a minimalist approach using flow sampling and sample-and-hold as sampling primitives and configuring these in a network-wide coordinated fashion using cSamp. We show that this proposal yields better accuracy across a collection of application-level metrics than dividing the same memory resources across metric-specific algorithms. Moreover, because a minimalist approach enables late binding to what application level metrics are important, it better insulates router implementations and deployments from changing monitoring needs.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 10th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Sekar, Vyas and Reiter, Michael K. and Zhang, Hui},
	year = {2010},
	keywords = {sampling, anomaly detection, traffic monitoring, sketch and streaming algorithm},
	pages = {328--341},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\VIKJ7VZ4\\Sekar et al. - 2010 - Revisiting the Case for a Minimalist Approach for .pdf:application/pdf;Sekar 等。 - 2010 - Revisiting the case for a minimalist approach for .pdf:C\:\\Users\\zzy\\Zotero\\storage\\56GM58DP\\Sekar 等。 - 2010 - Revisiting the case for a minimalist approach for .pdf:application/pdf}
}

@inproceedings{schweller_reversible_2004,
	address = {New York, NY, USA},
	series = {{IMC} '04},
	title = {Reversible {Sketches} for {Efficient} and {Accurate} {Change} {Detection} over {Network} {Data} {Streams}},
	isbn = {978-1-58113-821-4},
	doi = {10.1145/1028788.1028814},
	abstract = {Traffic anomalies such as failures and attacks are increasing in frequency and severity, and thus identifying them rapidly and accurately is critical for large network operators. The detection typically treats the traffic as a collection of flows and looks for heavy changes in traffic patterns (\textit{e.g.}, volume, number of connections). However, as link speeds and the number of flows increase, keeping per-flow state is not scalable. The recently proposed sketch-based schemes [14] are among the very few that can detect heavy changes and anomalies over massive data streams at network traffic speeds. However, sketches do not preserve the key (\textit{e.g.}, source IP address) of the flows. Hence, even if anomalies are detected, it is difficult to infer the culprit flows, making it a big practical hurdle for online deployment. Meanwhile, the number of keys is too large to record. To address this challenge, we propose efficient \textit{reversible hashing} algorithms to infer the keys of culprit flows from sketches without storing any explicit key information. No extra memory or memory accesses are needed for recording the streaming data. Meanwhile, the heavy change detection daemon runs in the background with space complexity and computational time sublinear to the key space size. This short paper describes the conceptual framework of the reversible sketches, as well as some initial approaches for implementation. See [23] for the optimized algorithms in details. comment We further apply various emph IP-mangling algorithms and emph bucket classification methods to reduce the false positives and false negatives. Evaluated with netflow traffic traces of a large edge router, we demonstrate that the reverse hashing can quickly infer the keys of culprit flows even for many changes with high accuracy.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 4th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Schweller, Robert and Gupta, Ashish and Parsons, Elliot and Chen, Yan},
	year = {2004},
	keywords = {change detection, network anomaly detection, IP mangling, modular hashing, reverse hashing, sketch and streaming algorithm},
	pages = {207--212},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\82P2XZ9E\\Schweller et al. - 2004 - Reversible Sketches for Efficient and Accurate Cha.pdf:application/pdf;Schweller 等。 - 2004 - Reversible sketches for efficient and accurate cha.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ANPQEW4C\\Schweller 等。 - 2004 - Reversible sketches for efficient and accurate cha.pdf:application/pdf}
}

@inproceedings{wang_sflow:_2004,
	address = {Washington, DC, USA},
	series = {{ICDCS} '04},
	title = {{sFlow}: {Towards} {Resource}-{Efficient} and {Agile} {Service} {Federation} in {Service} {Overlay} {Networks}},
	isbn = {978-0-7695-2086-5},
	shorttitle = {{sFlow}},
	abstract = {Existing research work towards the composition of complex federated services has assumed that service requests and deliveries flow through a particular service path or tree. In this paper, we extend such a service model to a directed acyclic graph, allowing services to be delivered via parallel paths and interleaved with each other. Such an assumption of the service flow model has apparently introduced complexities towards the development of a distributed algorithm to federate existing services, as well as the provisioning of the required quality in the most resource-efficient fashion. To this end, we propose sFlow, a fully distributedalgorithm to be executed on all service nodes, such that the federated service flow graph is resource efficient, performs well, and meets the demands of service consumers.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 24th {International} {Conference} on {Distributed} {Computing} {Systems} ({ICDCS}'04)},
	publisher = {IEEE Computer Society},
	author = {Wang, Mea and Li, Baochun and Li, Zongpeng},
	year = {2004},
	pages = {628--635},
	file = {Wang 等。 - 2004 - sFlow towards resource-efficient and agile servic.pdf:C\:\\Users\\zzy\\Zotero\\storage\\589E9AXX\\Wang 等。 - 2004 - sFlow towards resource-efficient and agile servic.pdf:application/pdf}
}

@inproceedings{raspall_adaptive_2008,
	address = {New York, NY, USA},
	series = {{IMC} '08},
	title = {Adaptive {Shared}-state {Sampling}},
	isbn = {978-1-60558-334-1},
	doi = {10.1145/1452520.1452552},
	abstract = {We present two algorithms to the problem of identifying and measuring heavy-hitters. Our schemes report, with high probability, those flows that exceed a prescribed share of the traffic observed so far; along with an estimate of their sizes. One of the biggest advantages of our schemes is that they entirely rely on sampling. This makes them flexible and lightweight, permits implementing them in cheap DRAM and scale to very high speeds. Despite sampling, our algorithms can provide very accurate results and offer performance guarantees independent of the traffic mix. Most remarkably, the schemes are shown to require memory that is constant regardless of the volume and composition of the traffic observed. Thus, besides computationally light, cost-effective and flexible, they are scalable and robust against malicious traffic patterns. We provide theoretical and empirical results on their performance; the latter, with software implementations and real traffic traces.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 8th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Raspall, Frederic and Sallent, Sebastia},
	year = {2008},
	keywords = {scalability, sampling, frequent items, heavy-hitters},
	pages = {271--284},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\CZFNUS8N\\Raspall and Sallent - 2008 - Adaptive Shared-state Sampling.pdf:application/pdf}
}

@inproceedings{yu_software_2013,
	title = {Software {Defined} {Traffic} {Measurement} with {OpenSketch}},
	abstract = {Most network management tasks in software-defined networks (SDN) involve two stages: measurement and control. While many efforts have been focused on network control APIs for SDN, little attention goes into measurement. The key challenge of designing a new measurement API is to strike a careful balance between generality (supporting a wide variety of measurement tasks) and efficiency (enabling high link speed and low cost). We propose a software defined traffic measurement architecture OpenSketch, which separates the measurement data plane from the control plane. In the data plane, OpenSketch provides a simple three-stage pipeline (hashing, filtering, and counting), which can be implemented with commodity switch components and support many measurement tasks. In the control plane, OpenSketch provides a measurement library that automatically configures the pipeline and allocates resources for different measurement tasks. Our evaluations of real-world packet traces, our prototype on NetFPGA, and the implementation of five measurement tasks on top of OpenSketch, demonstrate that OpenSketch is general, efficient and easily programmable.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 10th {USENIX} {Conference} on {Networked} {Systems} {Design} and {Implementation}},
	author = {Yu, Minlan and Jose, Lavanya and Miao, Rui},
	year = {2013},
	keywords = {software defined measurement, sketch and streaming algorithm},
	pages = {29--42},
	file = {Yu 等。 - Software Deﬁned Trafﬁc Measurement with OpenSketch.pdf:C\:\\Users\\zzy\\Zotero\\storage\\4RFZ28AN\\Yu 等。 - Software Deﬁned Trafﬁc Measurement with OpenSketch.pdf:application/pdf}
}

@inproceedings{chen_good_2016,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '16},
	title = {The {Good}, the {Bad}, and the {Differences}: {Better} {Network} {Diagnostics} with {Differential} {Provenance}},
	isbn = {978-1-4503-4193-6},
	shorttitle = {The {Good}, the {Bad}, and the {Differences}},
	doi = {10.1145/2934872.2934910},
	abstract = {In this paper, we propose a new approach to diagnosing problems in complex distributed systems. Our approach is based on the insight that many of the trickiest problems are anomalies. For instance, in a network, problems often affect only a small fraction of the traffic (e.g., perhaps a certain subnet), or they only manifest infrequently. Thus, it is quite common for the operator to have “examples” of both working and non-working traffic readily available – perhaps a packet that was misrouted, and a similar packet that was routed correctly. In this case, the cause of the problem is likely to be wherever the two packets were treated differently by the network. We present the design of a debugger that can leverage this information using a novel concept that we call differential provenance. Differential provenance tracks the causal connections between network states and state changes, just like classical provenance, but it can additionally perform root-cause analysis by reasoning about the differences between two provenance trees. We have built a diagnostic tool that is based on differential provenance, and we have used our tool to debug a number of complex, realistic problems in two scenarios: software-defined networks and MapReduce jobs. Our results show that differential provenance can be maintained at relatively low cost, and that it can deliver very precise diagnostic information; in many cases, it can even identify the precise root cause of the problem.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 2016 {ACM} {SIGCOMM} {Conference}},
	publisher = {ACM},
	author = {Chen, Ang and Wu, Yang and Haeberlen, Andreas and Zhou, Wenchao and Loo, Boon Thau},
	year = {2016},
	keywords = {Debugging, Network diagnostics, Provenance},
	pages = {115--128},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\GCBW8CLS\\Chen et al. - 2016 - The Good, the Bad, and the Differences Better Net.pdf:application/pdf}
}

@article{cormode_whats_2005,
	title = {What's {New}: {Finding} {Significant} {Differences} in {Network} {Data} {Streams}},
	volume = {13},
	issn = {1063-6692},
	shorttitle = {What's {New}},
	doi = {10.1109/TNET.2005.860096},
	abstract = {Monitoring and analyzing network traffic usage patterns is vital for managing IP Networks. An important problem is to provide network managers with information about changes in traffic, informing them about "what's new." Specifically, we focus on the challenge of finding significantly large differences in traffic: over time, between interfaces and between routers. We introduce the idea of a deltoid: an item that has a large difference, whether the difference is absolute, relative or variational.We present novel algorithms for finding the most significant deltoids in high-speed traffic data, and prove that they use small space, very small time per update, and are guaranteed to find significant deltoids with pre-specified accuracy. In experimental evaluation with real network traffic, our algorithms perform well and recover almost all deltoids. This is the first work to provide solutions capable of working over the data with one pass, at network traffic speeds.},
	number = {6},
	urldate = {2018-04-16},
	journal = {IEEE/ACM Trans. Netw.},
	author = {Cormode, Graham and Muthukrishnan, S.},
	month = dec,
	year = {2005},
	keywords = {change detection, data streams, deltoids, network data analysis},
	pages = {1219--1232},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\39SD3563\\Cormode and Muthukrishnan - 2005 - What's New Finding Significant Differences in Net.pdf:application/pdf;ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\GNK2RNVP\\Cormode and Muthukrishnan - 2005 - What's New Finding Significant Differences in Net.pdf:application/pdf;Cormode 和 Muthukrishnan - 2005 - What's new finding significant differences in net.pdf:C\:\\Users\\zzy\\Zotero\\storage\\DYXBQF4Z\\Cormode 和 Muthukrishnan - 2005 - What's new finding significant differences in net.pdf:application/pdf}
}

@inproceedings{raspall_shared-state_2006,
	address = {New York, NY, USA},
	series = {{IMC} '06},
	title = {Shared-state {Sampling}},
	isbn = {978-1-59593-561-8},
	doi = {10.1145/1177080.1177082},
	abstract = {We present an algorithm, Shared-State Sampling (S3), for the problem of detecting large flows in high-speed networks. While devised with different principles in mind, (S3) turns out to be a generalization of two existing algorithms tackling the same problem: Sample-and-Hold and Multistage Filters. S3 is found to outperform its predecessors, with the advantage of smoothly adapting to the memory technology available, to the extent of allowing a partial implementation in DRAM. (S3) exhibits mild tradeoffs between the different metrics of interest, which greatly benefits the scalability of the approach. The problem of detecting frequent items in streams appears in other areas. We also compare our algorithm with proposals appearing in the context of databases and regarded superior to the aforementioned. Our analysis and experimental results show that, among those evaluated, (S3) is the most attractive and scalable solution to the problem in the context of high-speed network measurements.},
	urldate = {2018-04-16},
	booktitle = {Proceedings of the 6th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Raspall, Frederic and Sallent, Sebastia and Yufera, Josep},
	year = {2006},
	keywords = {scalability, per-flow measurements},
	pages = {1--14},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WMXQJA2M\\Raspall et al. - 2006 - Shared-state Sampling.pdf:application/pdf;Raspall 等。 - 2006 - Shared-state sampling.pdf:C\:\\Users\\zzy\\Zotero\\storage\\Z7IJPXJQ\\Raspall 等。 - 2006 - Shared-state sampling.pdf:application/pdf}
}

@inproceedings{cantieni_reformulating_2006,
	series = {{CoNEXT} '06},
	title = {Reformulating the {Monitor} {Placement} {Problem}: {Optimal} {Network}-wide {Sampling}},
	isbn = {978-1-59593-456-7},
	shorttitle = {Reformulating the {Monitor} {Placement} {Problem}},
	doi = {10.1145/1368436.1368444},
	abstract = {Confronted with the generalization of monitoring in operational networks, researchers have proposed placement algorithms that can help ISPs deploy their monitoring infrastructure in a cost effective way, while maximizing the benefits of their infrastructure. However, a static placement of monitors cannot be optimal given the short-term and long-term variations in traffic due to re-routing events, anomalies and the normal network evolution. In addition, most ISPs already deploy router embedded monitoring functionalities. Despite some limitations (inherent to being part of a router), these monitoring tools give greater visibility on the network traffic but raise the question on how to configure a network-wide monitoring infrastructure that may contain hundreds of monitoring points. We reformulate the placement problem as follows. Given a network where all links can be monitored, which monitors should be activated and which sampling rate should be set on these monitors in order to achieve a given measurement task with high accuracy and low resource consumption? We provide a formulation of the problem, an optimal algorithm to solve it, and we study its performance on a real backbone network.},
	urldate = {2018-04-17},
	booktitle = {Proceedings of the 2006 {ACM} {CoNEXT} {Conference}},
	author = {Cantieni, Gion Reto and Iannaccone, Gianluca and Barakat, Chadi and Diot, Christophe and Thiran, Patrick},
	year = {2006},
	keywords = {network-wide traffic measurement, formulation intensive},
	pages = {5:1--5:12},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\NBINTCZQ\\Cantieni et al. - 2006 - Reformulating the Monitor Placement Problem Optim.pdf:application/pdf}
}

@article{duffield_trajectory_2001,
	title = {Trajectory {Sampling} for {Direct} {Traffic} {Observation}},
	volume = {9},
	issn = {1063-6692},
	doi = {10.1109/90.929851},
	abstract = {Traffic measurement is a critical component for the control and engine ering of communication networks. We argue that traffic measurement should make it possible to obtain the spatial flow of traffic through the domain, i.e., the paths followed by packets between any ingress and egress point of the domain. Most resource allocation and capacity planning tasks can benefit from such information. Also, traffic measurements should be obtained without a routing model and without knowledge of network state. This allows the traffic measurement process to be resilient to network failures and state uncertainty. We propose a method that allows the direct inference of traffic flows through a domain by observing the trajectories of a subset of all packets traversing the network. The key advantages of the method are that 1) it does not rely on routing state; 2) its implementation cost is small; and 3) the measurement reporting traffic is modest and can be controlled precisely. The key idea of the method is to sample packets based on a hash function computed over the packet content. Using the same hash function will yield the same sample set of packets in the entire domain, and enables us to reconstruct packet trajectories.},
	number = {3},
	urldate = {2018-04-17},
	journal = {IEEE/ACM Trans. Netw.},
	author = {Duffield, N. G. and Grossglauser, Matthias},
	month = jun,
	year = {2001},
	keywords = {traffic engineering, sampling, Hash functions, Internet traffic measurement},
	pages = {280--292},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\W5XUGD2K\\Duffield and Grossglauser - 2001 - Trajectory Sampling for Direct Traffic Observation.pdf:application/pdf}
}

@article{duffield_learn_2005,
	title = {Learn more, sample less: control of volume and variance in network measurement},
	volume = {51},
	issn = {0018-9448},
	shorttitle = {Learn more, sample less},
	doi = {10.1109/TIT.2005.846400},
	abstract = {This paper deals with sampling objects from a large stream. Each object possesses a size, and the aim is to be able to estimate the total size of an arbitrary subset of objects whose composition is not known at the time of sampling. This problem is motivated from network measurements in which the objects are flow records exported by routers and the sizes are the number of packet or bytes reported in the record. Subsets of interest could be flows from a certain customer or flows from a worm attack. This paper introduces threshold sampling as a sampling scheme that optimally controls the expected volume of samples and the variance of estimators over any classification of flows. It provides algorithms for dynamic control of sample volumes and evaluates them on flow data gathered from a commercial Internet Protocol (IP) network. The algorithms are simple to implement and robust to variation in network conditions. The work reported here has been applied in the measurement infrastructure of the commercial IP network. To not have employed sampling would have entailed an order of magnitude greater capital expenditure to accommodate the measurement traffic and its processing.},
	number = {5},
	journal = {IEEE Transactions on Information Theory},
	author = {Duffield, N. and Lund, C. and Thorup, M.},
	month = may,
	year = {2005},
	keywords = {network measurement, IP networks, sampling, Telecommunication traffic, Estimation, flows, Fluid flow measurement, Heuristic algorithms, Internet measurement, Internet protocol, IP network, Optimal control, Protocols, Robustness, Size measurement, transport protocols, variance reduction, Volume measurement},
	pages = {1756--1775},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\DBXJ96JG\\1424313.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\LE2IW3AC\\Duffield et al. - 2005 - Learn more, sample less control of volume and var.pdf:application/pdf}
}

@inproceedings{estan_building_2004,
	series = {{SIGCOMM} '04},
	title = {Building a {Better} {NetFlow}},
	isbn = {978-1-58113-862-7},
	doi = {10.1145/1015467.1015495},
	abstract = {Network operators need to determine the composition of the traffic mix on links when looking for dominant applications, users, or estimating traffic matrices. Cisco's NetFlow has evolved into a solution that satisfies this need by reporting flow records that summarize a sample of the traffic traversing the link. But sampled NetFlow has shortcomings that hinder the collection and analysis of traffic data. First, during flooding attacks router memory and network bandwidth consumed by flow records can increase beyond what is available; second, selecting the right static sampling rate is difficult because no single rate gives the right tradeoff of memory use versus accuracy for all traffic mixes; third, the heuristics routers use to decide when a flow is reported are a poor match to most applications that work with time bins; finally, it is impossible to estimate without bias the number of active flows for aggregates with non-TCP traffic.In this paper we propose Adaptive NetFlow, deployable through an update to router software, which addresses many shortcomings of NetFlow by dynamically adapting the sampling rate to achieve robustness without sacrificing accuracy. To enable counting of non-TCP flows, we propose an optional Flow Counting Extension that requires augmenting existing hardware at routers. Both our proposed solutions readily provide descriptions of the traffic of progressively smaller sizes. Transmitting these at progressively higher levels of reliability allows graceful degradation of the accuracy of traffic reports in response to network congestion on the reporting path.},
	urldate = {2018-04-17},
	booktitle = {Proceedings of the 2004 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	author = {Estan, Cristian and Keys, Ken and Moore, David and Varghese, George},
	year = {2004},
	keywords = {network monitoring, traffic measurement, data summarization},
	pages = {245--256},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\W9Q229BG\\Estan et al. - 2004 - Building a Better NetFlow.pdf:application/pdf}
}

@inproceedings{kompella_power_2005,
	address = {Berkeley, CA, USA},
	series = {{IMC} '05},
	title = {The {Power} of {Slicing} in {Internet} {Flow} {Measurement}},
	abstract = {Network service providers use high speed flow measurement solutions in routers to track dominant applications, compute traffic matrices and to perform other such operational tasks. These solutions typically need to operate within the constraints of the three precious router resources - CPU, memory and bandwidth. Cisco's Net-Flow, a widely deployed flow measurement solution, uses a configurable static sampling rate to control these resources. In this paper, we propose Flow Slices, a solution inspired from previous enhancements to NetFlow such as Smart Sampling [8], Adaptive NetFlow (ANF) [10]. Flow Slices, in contrast to NetFlow, controls the three resource bottlenecks at the router using separate "tuning knobs"; it uses packet sampling to control CPU usage, flow sampling to control memory usage and finally multi-factor smart sampling to control reporting bandwidth. The resulting solution has smaller resource requirements than current proposals (up to 80\% less memory usage than ANF), enables more accurate traffic analysis results (up to 10\% less error than ANF) and balances better the error in estimates of byte, packet and flow counts (flow count estimates up to 8 times more accurate than after Smart Sampling). We provide theoretical analyses of the unbiasedness and variances of the estimators based on Flow Slices and experimental comparisons with other flow measurement solutions such as ANF.},
	urldate = {2018-04-17},
	booktitle = {Proceedings of the 5th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {USENIX Association},
	author = {Kompella, Ramana Rao and Estan, Cristian},
	year = {2005},
	pages = {9--9},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\UAHSH4ZY\\Kompella and Estan - 2005 - The Power of Slicing in Internet Flow Measurement.pdf:application/pdf}
}

@inproceedings{molina_comparative_2008,
	title = {A {Comparative} {Experimental} {Study} of {Hash} {Functions} {Applied} to {Packet} {Sampling}},
	urldate = {2018-04-17},
	booktitle = {下一代网络有效性研究},
	author = {Molina, M. and Niccolini, S. and Duffield, N. G.},
	year = {2008},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\JTW5XTXG\\s.html:text/html}
}

@inproceedings{zhang_fast_2003,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '03},
	title = {Fast {Accurate} {Computation} of {Large}-scale {IP} {Traffic} {Matrices} from {Link} {Loads}},
	isbn = {978-1-58113-664-7},
	doi = {10.1145/781027.781053},
	abstract = {A matrix giving the traffic volumes between origin and destination in a network has tremendously potential utility for network capacity planning and management. Unfortunately, traffic matrices are generally unavailable in large operational IP networks. On the other hand, link load measurements are readily available in IP networks. In this paper, we propose a new method for practical and rapid inference of traffic matrices in IP networks from link load measurements, augmented by readily available network and routing configuration information. We apply and validate the method by computing backbone-router to backbone-router traffic matrices on a large operational tier-1 IP network -- a problem an order of magnitude larger than any other comparable method has tackled. The results show that the method is remarkably fast and accurate, delivering the traffic matrix in under five seconds.},
	urldate = {2018-04-18},
	booktitle = {Proceedings of the 2003 {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Zhang, Yin and Roughan, Matthew and Duffield, Nick and Greenberg, Albert},
	year = {2003},
	keywords = {traffic engineering, SNMP, traffic matrix estimation},
	pages = {206--217},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WLHBZPE9\\Zhang et al. - 2003 - Fast Accurate Computation of Large-scale IP Traffi.pdf:application/pdf}
}

@inproceedings{lakhina_diagnosing_2004,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '04},
	title = {Diagnosing {Network}-wide {Traffic} {Anomalies}},
	isbn = {978-1-58113-862-7},
	doi = {10.1145/1015467.1015492},
	abstract = {Anomalies are unusual and significant changes in a network's traffic levels, which can often span multiple links. Diagnosing anomalies is critical for both network operators and end users. It is a difficult problem because one must extract and interpret anomalous patterns from large amounts of high-dimensional, noisy data.In this paper we propose a general method to diagnose anomalies. This method is based on a separation of the high-dimensional space occupied by a set of network traffic measurements into disjoint subspaces corresponding to normal and anomalous network conditions. We show that this separation can be performed effectively by Principal Component Analysis.Using only simple traffic measurements from links, we study volume anomalies and show that the method can: (1) accurately detect when a volume anomaly is occurring; (2) correctly identify the underlying origin-destination (OD) flow which is the source of the anomaly; and (3) accurately estimate the amount of traffic involved in the anomalous OD flow.We evaluate the method's ability to diagnose (i.e., detect, identify, and quantify) both existing and synthetically injected volume anomalies in real traffic from two backbone networks. Our method consistently diagnoses the largest volume anomalies, and does so with a very low false alarm rate.},
	urldate = {2018-04-18},
	booktitle = {Proceedings of the 2004 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	publisher = {ACM},
	author = {Lakhina, Anukool and Crovella, Mark and Diot, Christophe},
	year = {2004},
	keywords = {network-wide traffic measurement, anomaly detection, network traffic analysis},
	pages = {219--230},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\L4TIDXLE\\Lakhina et al. - 2004 - Diagnosing Network-wide Traffic Anomalies.pdf:application/pdf}
}

@inproceedings{sagnol_successive_2010,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '10},
	title = {Successive {C}-optimal {Designs}: {A} {Scalable} {Technique} to {Optimize} the {Measurements} on {Large} {Networks}},
	isbn = {978-1-4503-0038-4},
	shorttitle = {Successive {C}-optimal {Designs}},
	doi = {10.1145/1811039.1811080},
	abstract = {We propose a new approach to optimize the deployment and the sampling rates of network monitoring tools, such as Netflow, on a large IP network. It reduces to solving a stochastic sequence of Second Order Cone Programs. We validate our approach with experiments relying on real data from a commercial network.},
	urldate = {2018-04-18},
	booktitle = {Proceedings of the {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Sagnol, Guillaume and Bouhtou, Mustapha and Gaubert, Stéphane},
	year = {2010},
	keywords = {c-optimality, netflow, optimal experimental design, SOCP},
	pages = {347--348},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\8SQDPQAB\\Sagnol et al. - 2010 - Successive C-optimal Designs A Scalable Technique.pdf:application/pdf}
}

@inproceedings{lee_relsamp:_2011,
	title = {{RelSamp}: {Preserving} application structure in sampled flow measurements},
	shorttitle = {{RelSamp}},
	doi = {10.1109/INFCOM.2011.5935054},
	abstract = {The Internet has significantly evolved in the number and variety of applications. Network operators need mechanisms to constantly monitor and study these applications. Given modern applications routinely consist of several flows, potentially to many different destinations, existing measurement approaches such as Sampled NetFlow sample only a few flows per application session. To address this issue, in this paper, we introduce RelSamp architecture that implements the notion of related sampling where flows that are part of the same application session are given higher probability. In our evaluation using real traces, we show that RelSamp achieves 5-10x more flows per application session compared to Sampled NetFlow for the same effective number of sampled packets. We also show that behavioral and statistical classification approaches such as BLINC, SVM and C4.5 achieve up to 50\% better classification accuracy compared to Sampled NetFlow, while not breaking existing management tasks such as volume estimation.},
	booktitle = {2011 {Proceedings} {IEEE} {INFOCOM}},
	author = {Lee, M. and Hajjat, M. and Kompella, R. R. and Rao, S.},
	month = apr,
	year = {2011},
	keywords = {Internet, IP networks, Monitoring, sampling, Estimation, Accuracy, application structure, Inspection, Random variables, RelSamp, statistical analysis, statistical classification},
	pages = {2354--2362},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\WVDP5BCZ\\5935054.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\UPC7QVBR\\Lee et al. - 2011 - RelSamp Preserving application structure in sampl.pdf:application/pdf}
}

@article{raza_measurouting:_2012,
	title = {{MeasuRouting}: {A} {Framework} for {Routing} {Assisted} {Traffic} {Monitoring}},
	volume = {20},
	issn = {1063-6692},
	shorttitle = {{MeasuRouting}},
	doi = {10.1109/TNET.2011.2159991},
	abstract = {Monitoring transit traffic at one or more points in a network is of interest to network operators for reasons of traffic accounting, debugging or troubleshooting, forensics, and traffic engineering. Previous research in the area has focused on deriving a placement of monitors across the network toward the end of maximizing the monitoring utility of the network operator for a given traffic routing. However, both traffic characteristics and measurement objectives can dynamically change over time, rendering a previously optimal placement of monitors suboptimal. It is not feasible to dynamically redeploy/reconfigure measurement infrastructure to cater to such evolving measurement requirements. We address this problem by strategically routing traffic subpopulations over fixed monitors. We refer to this approach as MeasuRouting. The main challenge for MeasuRouting is to work within the constraints of existing intradomain traffic engineering operations that are geared for efficiently utilizing bandwidth resources, or meeting quality-of-service (QoS) constraints, or both. A fundamental feature of intradomain routing, which makes MeasuRouting feasible, is that intradomain routing is often specified for aggregate flows. MeasuRouting can therefore differentially route components of an aggregate flow while ensuring that the aggregate placement is compliant to original traffic engineering objectives. In this paper, we present a theoretical framework for MeasuRouting. Furthermore, as proofs of concept, we present synthetic and practical monitoring applications to showcase the utility enhancement achieved with MeasuRouting.},
	number = {1},
	journal = {IEEE/ACM Transactions on Networking},
	author = {Raza, S. and Huang, G. and Chuah, C. N. and Seetharaman, S. and Singh, J. P.},
	month = feb,
	year = {2012},
	keywords = {Routing protocols, traffic engineering, Monitoring, Routing, telecommunication traffic, telecommunication network routing, Aggregates, monitoring, Anomaly detection, bandwidth resource, Gain measurement, intradomain routing, intradomain traffic engineering operation, measurement requirement, MeasuRouting, network management, network operator, optimal placement, QoS, quality of service, Quality of service, quality-of-service constraint, routing assisted traffic monitoring utility, traffic characteristics, traffic measurements, traffic routing},
	pages = {45--56},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\PRHSJHPY\\5955085.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\77QPCDAJ\\Raza et al. - 2012 - MeasuRouting A Framework for Routing Assisted Tra.pdf:application/pdf}
}

@inproceedings{cao_identifying_2009,
	title = {Identifying {High} {Cardinality} {Internet} {Hosts}},
	doi = {10.1109/INFCOM.2009.5061990},
	abstract = {The Internet host cardinality, defined as the number of distinct peers that an Internet host communicates with, is an important metric for profiling Internet hosts. Some example applications include behavior based network intrusion detection, p2p hosts identification, and server identification. However, due to the tremendous number of hosts in the Internet and high speed links, tracking the exact cardinality of each host is not feasible due to the limited memory and computation resource. Existing approaches on host cardinality counting have primarily focused on hosts of extremely high cardinalities. These methods do not work well with hosts of moderately large cardinalities that are needed for certain host behavior profiling such as detection of p2p hosts or port scanners. In this paper, we propose an online sampling approach for identifying hosts whose cardinality exceeds some moderate prescribed threshold, e.g. 50, or within specific ranges. The main advantage of our approach is that it can filter out the majority of low cardinality hosts while preserving the hosts of interest, and hence minimize the memory resources wasted by tracking irrelevant hosts. Our approach consists of three components: 1) two-phase filtering for eliminating low cardinality hosts, 2) thresholded bitmap for counting cardinalities, and 3) bias correction. Through both theoretical analysis and experiments using real Internet traces, we demonstrate that our approach requires much less memory than existing approaches do whereas yields more accurate estimates.},
	booktitle = {{IEEE} {INFOCOM} 2009},
	author = {Cao, J. and Jin, Y. and Chen, A. and Bu, T. and Zhang, Z. L.},
	month = apr,
	year = {2009},
	keywords = {Internet, Sampling methods, Telecommunication traffic, Application software, cardinality, Communications Society, Computer science, Filtering, Internet hosts, Intrusion detection, Network servers, online sampling approach, p2p, peer to peer, peer-to-peer computing, prescribed threshold, Statistics},
	pages = {810--818},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\9QYL3MMC\\5061990.html:text/html;IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\NZMHJ7DZ\\5061990.html:text/html}
}

@inproceedings{cormode_countmin_2004,
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {An {Improved} {Data} {Stream} {Summary}: {The} {Count}-{Min} {Sketch} and {Its} {Applications}},
	isbn = {978-3-540-21258-4 978-3-540-24698-5},
	shorttitle = {An {Improved} {Data} {Stream} {Summary}},
	doi = {10.1007/978-3-540-24698-5_7},
	abstract = {We introduce a new sublinear space data structure—the Count-Min Sketch— for summarizing data streams. Our sketch allows fundamental queries in data stream summarization such as point, range, and inner product queries to be approximately answered very quickly; in addition, it can be applied to solve several important problems in data streams such as finding quantiles, frequent items, etc. The time and space bounds we show for using the CM sketch to solve these problems significantly improve those previously known — typically from 1/ε 2 to 1/ε in factor.},
	language = {en},
	urldate = {2018-04-20},
	booktitle = {{LATIN} 2004: {Theoretical} {Informatics}},
	author = {Cormode, Graham and Muthukrishnan, S.},
	month = apr,
	year = {2004},
	keywords = {sketch and streaming algorithm},
	pages = {29--38},
	file = {Cormode and Muthukrishnan - An Improved Data Stream Summary The Count-Min Ske.pdf:C\:\\Users\\zzy\\Zotero\\storage\\965NKJEF\\Cormode and Muthukrishnan - An Improved Data Stream Summary The Count-Min Ske.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\CXX37QMD\\978-3-540-24698-5_7.html:text/html;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\WHBY7ZME\\978-3-540-24698-5_7.html:text/html}
}

@inproceedings{curtis_devoflow:_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {{DevoFlow}: {Scaling} {Flow} {Management} for {High}-performance {Networks}},
	isbn = {978-1-4503-0797-0},
	shorttitle = {{DevoFlow}},
	doi = {10.1145/2018436.2018466},
	abstract = {OpenFlow is a great concept, but its original design imposes excessive overheads. It can simplify network and traffic management in enterprise and data center environments, because it enables flow-level control over Ethernet switching and provides global visibility of the flows in the network. However, such fine-grained control and visibility comes with costs: the switch-implementation costs of involving the switch's control-plane too often and the distributed-system costs of involving the OpenFlow controller too frequently, both on flow setups and especially for statistics-gathering. In this paper, we analyze these overheads, and show that OpenFlow's current design cannot meet the needs of high-performance networks. We design and evaluate DevoFlow, a modification of the OpenFlow model which gently breaks the coupling between control and global visibility, in a way that maintains a useful amount of visibility without imposing unnecessary costs. We evaluate DevoFlow through simulations, and find that it can load-balance data center traffic as well as fine-grained solutions, without as much overhead: DevoFlow uses 10--53 times fewer flow table entries at an average switch, and uses 10--42 times fewer control messages.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Curtis, Andrew R. and Mogul, Jeffrey C. and Tourrilhes, Jean and Yalagandula, Praveen and Sharma, Puneet and Banerjee, Sujata},
	year = {2011},
	keywords = {data center, flow-based networking, switch design},
	pages = {254--265},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\C3IDZK3L\\Curtis et al. - 2011 - DevoFlow Scaling Flow Management for High-perform.pdf:application/pdf}
}

@article{flajolet_hyperloglog:_2007,
	title = {{HyperLogLog}: the analysis of a near-optimal cardinality estimation algorithm},
	language = {en},
	author = {Flajolet, Philippe and Fusy, Éric and Gandouet, Olivier},
	year = {2007},
	pages = {20},
	file = {Flajolet et al. - 2008 - HyperLogLog  the analysis of a near-optimal cardi.pdf:C\:\\Users\\zzy\\Zotero\\storage\\7Y8327RU\\Flajolet et al. - 2008 - HyperLogLog  the analysis of a near-optimal cardi.pdf:application/pdf;Flajolet et al. - HyperLogLog the analysis of a near-optimal cardin.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ET8NE2GL\\Flajolet et al. - HyperLogLog the analysis of a near-optimal cardin.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\6JS4CLGA\\dmAH0110.html:text/html}
}

@inproceedings{heule_hyperloglog_2013,
	address = {New York, NY, USA},
	series = {{EDBT} '13},
	title = {{HyperLogLog} in {Practice}: {Algorithmic} {Engineering} of a {State} of the {Art} {Cardinality} {Estimation} {Algorithm}},
	isbn = {978-1-4503-1597-5},
	shorttitle = {{HyperLogLog} in {Practice}},
	doi = {10.1145/2452376.2452456},
	abstract = {Cardinality estimation has a wide range of applications and is of particular importance in database systems. Various algorithms have been proposed in the past, and the HyperLogLog algorithm is one of them. In this paper, we present a series of improvements to this algorithm that reduce its memory requirements and significantly increase its accuracy for an important range of cardinalities. We have implemented our proposed algorithm for a system at Google and evaluated it empirically, comparing it to the original HyperLogLog algorithm. Like HyperLogLog, our improved algorithm parallelizes perfectly and computes the cardinality estimate in a single pass.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the 16th {International} {Conference} on {Extending} {Database} {Technology}},
	publisher = {ACM},
	author = {Heule, Stefan and Nunkesser, Marc and Hall, Alexander},
	year = {2013},
	pages = {683--692},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\XCMSGR5M\\Heule et al. - 2013 - HyperLogLog in Practice Algorithmic Engineering o.pdf:application/pdf}
}

@inproceedings{kumar_data_2004,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '04/{Performance} '04},
	title = {Data {Streaming} {Algorithms} for {Efficient} and {Accurate} {Estimation} of {Flow} {Size} {Distribution}},
	isbn = {978-1-58113-873-3},
	doi = {10.1145/1005686.1005709},
	abstract = {Knowing the distribution of the sizes of traffic flows passing through a network link helps a network operator to characterize network resource usage, infer traffic demands, detect traffic anomalies, and accommodate new traffic demands through better traffic engineering. Previous work on estimating the flow size distribution has been focused on making inferences from sampled network traffic. Its accuracy is limited by the (typically) low sampling rate required to make the sampling operation affordable. In this paper we present a novel data streaming algorithm to provide much more accurate estimates of flow distribution, using a "lossy data structure" which consists of an array of counters fitted well into SRAM. For each incoming packet, our algorithm only needs to increment one underlying counter, making the algorithm fast enough even for 40 Gbps (OC-768) links. The data structure is lossy in the sense that sizes of multiple flows may collide into the same counter. Our algorithm uses Bayesian statistical methods such as Expectation Maximization to infer the most likely flow size distribution that results in the observed counter values after collision. Evaluations of this algorithm on large Internet traces obtained from several sources (including a tier-1 ISP) demonstrate that it has very high measurement accuracy (within 2\%). Our algorithm not only dramatically improves the accuracy of flow distribution measurement, but also contributes to the field of data streaming by formalizing an existing methodology and applying it to the context of estimating the flow-distribution.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the {Joint} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Kumar, Abhishek and Sung, Minho and Xu, Jun (Jim) and Wang, Jia},
	year = {2004},
	keywords = {network measurement, traffic analysis, statistical inference, sketch and streaming algorithm},
	pages = {177--188},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\AU5KAWT7\\Kumar et al. - 2004 - Data Streaming Algorithms for Efficient and Accura.pdf:application/pdf}
}

@inproceedings{lee_improving_2005,
	address = {Berkeley, CA, USA},
	series = {{IMC} '05},
	title = {Improving {Sketch} {Reconstruction} {Accuracy} {Using} {Linear} {Least} {Squares} {Method}},
	abstract = {Sketch is a sublinear space data structure that allows one to approximately reconstruct the value associated with any given key in an input data stream. It is the basis for answering a number of fundamental queries on data streams, such as range queries, finding quantiles, frequent items, etc. In the networking context, sketch has been applied to identifying heavy hitters and changes, which is critical for traffic monitoring, accounting, and network anomaly detection. In this paper, we propose a novel approach called lsquare to significantly improve the reconstruction accuracy of the sketch data structure. Given a sketch and a set of keys, we estimate the values associated with these keys by constructing a linear system and finding the optimal solution for the system using linear least squares method. We use a large amount of real Internet traffic data to evaluate lsquare against countmin, the state-of-the-art sketch scheme. Our results suggest that given the same memory requirement, lsquare achieves much better reconstruction accuracy than countmin. Alternatively, given the same reconstruction accuracy, lsquare requires significantly less memory. This clearly demonstrates the effectiveness of our approach.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the 5th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {USENIX Association},
	author = {Lee, Gene Moo and Liu, Huiya and Yoon, Young and Zhang, Yin},
	year = {2005},
	keywords = {sketch and streaming algorithm},
	pages = {24--24},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\XQBQMPRN\\Lee et al. - 2005 - Improving Sketch Reconstruction Accuracy Using Lin.pdf:application/pdf}
}

@inproceedings{lu_counter_2008,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '08},
	title = {Counter {Braids}: {A} {Novel} {Counter} {Architecture} for {Per}-flow {Measurement}},
	isbn = {978-1-60558-005-0},
	shorttitle = {Counter {Braids}},
	doi = {10.1145/1375457.1375472},
	abstract = {Fine-grained network measurement requires routers and switches to update large arrays of counters at very high link speed (e.g. 40 Gbps). A naive algorithm needs an infeasible amount of SRAM to store both the counters and a flow-to-counter association rule, so that arriving packets can update corresponding counters at link speed. This has made accurate per-flow measurement complex and expensive, and motivated approximate methods that detect and measure only the large flows. This paper revisits the problem of accurate per-flow measurement. We present a counter architecture, called Counter Braids, inspired by sparse random graph codes. In a nutshell, Counter Braids "compresses while counting". It solves the central problems (counter space and flow-to-counter association) of per-flow measurement by "braiding" a hierarchy of counters with random graphs. Braiding results in drastic space reduction by sharing counters among flows; and using random graphs generated on-the-fly with hash functions avoids the storage of flow-to-counter association. The Counter Braids architecture is optimal (albeit with a complex decoder) as it achieves the maximum compression rate asymptotically. For implementation, we present a low-complexity message passing decoding algorithm, which can recover flow sizes with essentially zero error. Evaluation on Internet traces demonstrates that almost all flow sizes are recovered exactly with only a few bits of counter space per flow.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the 2008 {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Lu, Yi and Montanari, Andrea and Prabhakar, Balaji and Dharmapurikar, Sarang and Kabbani, Abdul},
	year = {2008},
	keywords = {network measurement, message passing algorithms, statistic counters},
	pages = {121--132},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WP9V7LNN\\Lu et al. - 2008 - Counter Braids A Novel Counter Architecture for P.pdf:application/pdf}
}

@techreport{venkataraman_new_2004,
	address = {Fort Belvoir, VA},
	title = {New {Streaming} {Algorithms} for {Fast} {Detection} of {Superspreaders}:},
	shorttitle = {New {Streaming} {Algorithms} for {Fast} {Detection} of {Superspreaders}},
	abstract = {High-speed monitoring of Internet trafﬁc is an important and challenging problem, with applications to realtime attack detection and mitigation, trafﬁc engineering, etc. However, packet-level monitoring requires fast streaming algorithms that use very little memory and little communication among collaborating network monitoring points.},
	language = {en},
	urldate = {2018-04-20},
	institution = {Defense Technical Information Center},
	author = {Venkataraman, Shobha and Song, Dawn and Gibbons, Phillip B. and Blum, Avrim},
	month = may,
	year = {2004},
	doi = {10.21236/ADA461026},
	file = {Venkataraman et al. - 2004 - New Streaming Algorithms for Fast Detection of Sup.pdf:C\:\\Users\\zzy\\Zotero\\storage\\TZ44MFH8\\Venkataraman et al. - 2004 - New Streaming Algorithms for Fast Detection of Sup.pdf:application/pdf}
}

@inproceedings{zhang_adaptive_2013,
	address = {New York, NY, USA},
	series = {{CoNEXT} '13},
	title = {An {Adaptive} {Flow} {Counting} {Method} for {Anomaly} {Detection} in {SDN}},
	isbn = {978-1-4503-2101-3},
	doi = {10.1145/2535372.2535411},
	abstract = {The accuracy and granularity of network flow measurement play a critical role in many network management tasks, especially for anomaly detection. Despite its important, traffic monitoring often introduces overhead to the network, thus, operators have to employ sampling and aggregation to avoid overloading the infrastructure. However, such sampled and aggregated information may affect the accuracy of traffic anomaly detection. In this work, we propose a novel method that performs adaptive zooming in the aggregation of flows to be measured. In order to better balance the monitoring overhead and the anomaly detection accuracy, we propose a prediction based algorithm that dynamically change the granularity of measurement along both the spatial and the temporal dimensions. To control the load on each individual switch, we carefully delegate monitoring rules in the network wide. Using real-world data and three simple anomaly detectors, we show that the adaptive based counting can detect anomalies more accurately with less overhead.},
	urldate = {2018-04-20},
	booktitle = {Proceedings of the {Ninth} {ACM} {Conference} on {Emerging} {Networking} {Experiments} and {Technologies}},
	publisher = {ACM},
	author = {Zhang, Ying},
	year = {2013},
	keywords = {software-defined networking, network measurement},
	pages = {25--30},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\7BHJQ4TT\\Zhang - 2013 - An Adaptive Flow Counting Method for Anomaly Detec.pdf:application/pdf}
}

@article{huang_measurement-aware_2012,
	title = {Measurement-{Aware} {Monitor} {Placement} and {Routing}: {A} {Joint} {Optimization} {Approach} for {Network}-{Wide} {Measurements}},
	volume = {9},
	issn = {1932-4537},
	shorttitle = {Measurement-{Aware} {Monitor} {Placement} and {Routing}},
	doi = {10.1109/TNSM.2012.010912.110128},
	abstract = {Network-wide traffic measurement is important for various network management tasks, ranging from traffic accounting, traffic engineering, network troubleshooting to security. Previous research in this area has focused on either deriving better monitor placement strategies for fixed routing, or strategically routing traffic sub-populations over existing deployed monitors to maximize the measurement gain. However, neither of them alone suffices in real scenarios, since not only the number of deployed monitors is limited, but also the traffic characteristics and measurement objectives are constantly changing. This paper presents an MMPR (Measurement-aware Monitor Placement and Routing) framework that jointly optimizes monitor placement and dynamic routing strategy to achieve maximum measurement utility. The main challenge in solving MMPR is to decouple the relevant decision variables and adhere to the intra-domain traffic engineering constraints. We formulate it as an MILP (Mixed Integer Linear Programming) problem and propose several heuristic algorithms to approximate the optimal solution and reduce the computation complexity. Through experiments using real traces and topologies (Abilene , AS6461 , and GEANT ), we show that our heuristic solutions can achieve measurement gains that are quite close to the optimal solutions, while reducing the computation times by a factor of 23X in Abilene (small), 246X in AS6461 (medium), and 233X in GEANT (large), respectively.},
	number = {1},
	journal = {IEEE Transactions on Network and Service Management},
	author = {Huang, G. and Chang, C. W. and Chuah, C. N. and Lin, B.},
	month = mar,
	year = {2012},
	keywords = {traffic engineering, GEANT, integer programming, linear programming, MILP, Monitoring, network-wide traffic measurement, Optimization, Routing, telecommunication traffic, computational complexity, telecommunication network routing, Gain measurement, AS6461, computation complexity, Current measurement, dynamic routing strategy, fixed routing, intradomain traffic engineering constraints, joint optimization approach, Mathematical model, measurement-aware monitor placement and routing framework, mixed integer linear programming problem, MMPR, routing, traffic accounting, Traffic measurement},
	pages = {48--59},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\AP9QW5J6\\6128762.html:text/html}
}

@article{cormode_countmin_2005,
	title = {An improved data stream summary: the count-min sketch and its applications},
	volume = {55},
	issn = {0196-6774},
	shorttitle = {An improved data stream summary},
	doi = {10.1016/j.jalgor.2003.12.001},
	abstract = {We introduce a new sublinear space data structure—the count-min sketch—for summarizing data streams. Our sketch allows fundamental queries in data stream summarization such as point, range, and inner product queries to be approximately answered very quickly; in addition, it can be applied to solve several important problems in data streams such as finding quantiles, frequent items, etc. The time and space bounds we show for using the CM sketch to solve these problems significantly improve those previously known—typically from 1/ε2 to 1/ε in factor.},
	number = {1},
	urldate = {2018-04-22},
	journal = {Journal of Algorithms},
	author = {Cormode, Graham and Muthukrishnan, S.},
	month = apr,
	year = {2005},
	keywords = {formulation intensive, sketch and streaming algorithm},
	pages = {58--75},
	file = {ScienceDirect Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\7BB3WYZM\\Cormode and Muthukrishnan - 2005 - An improved data stream summary the count-min ske.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\2477T6KS\\S0196677403001913.html:text/html}
}

@inproceedings{hohn_inverting_2003,
	title = {Inverting {Sampled} {Traffic}},
	isbn = {978-1-58113-773-6},
	doi = {10.1145/948205.948235},
	abstract = {Routers have the ability to output statistics about packets and flows of packets that traverse them. Since however the generation of detailed traffic statistics does not scale well with link speed, increasingly routers and measurement boxes implement sampling strategies at the packet level. In this paper we study both theoretically and practically what information about the original traffic can be inferred when sampling, or `thinning', is performed at the packet level. While basic packet level characteristics such as first order statistics can be fairly directly recovered, other aspects require more attention. We focus mainly on the spectral density, a second order statistic, and the distribution of the number of packets per flow, showing how both can be exactly recovered, in theory. We then show in detail why in practice this cannot be done using the traditional packet based sampling, even for high sampling rate. We introduce an alternative flow based thinning, where practical inversion is possible even at arbitrarily low sampling rate. We also investigate the theory and practice of fitting the parameters of a Poisson cluster process, modelling the full packet traffic, from sampled data.},
	urldate = {2018-04-22},
	booktitle = {Proceedings of the 3rd {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	author = {Hohn, Nicolas and Veitch, Darryl},
	year = {2003},
	keywords = {sampling, internet data, long range dependence, Poisson cluster process, TCP flows, thinning, traffic modeling, transform inversion},
	pages = {222--233},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\M9A7IG4E\\Hohn and Veitch - 2003 - Inverting Sampled Traffic.pdf:application/pdf}
}

@inproceedings{estan_bitmap_2003,
	address = {New York, NY, USA},
	series = {{IMC} '03},
	title = {Bitmap {Algorithms} for {Counting} {Active} {Flows} on {High} {Speed} {Links}},
	isbn = {978-1-58113-773-6},
	doi = {10.1145/948205.948225},
	abstract = {This paper presents a family of bitmap algorithms that address the problem of counting the number of distinct header patterns (flows) seen on a high speed link. Such counting can be used to detect DoS attacks and port scans, and to solve measurement problems. Counting is especially hard when processing must be done within a packet arrival time (8 nsec at OC-768 speeds) and, hence, must require only a small number of accesses to limited, fast memory. A naive solution that maintains a hash table requires several Mbytes because the number of flows can be above a million. By contrast, our new probabilistic algorithms take very little memory and are fast. The reduction in memory is particularly important for applications that run multiple concurrent counting instances. For example, we replaced the port scan detection component of the popular intrusion detection system Snort with one of our new algorithms. This reduced memory usage on a ten minute trace from 50 Mbytes to 5.6 Mbytes while maintaining a 99.77\% probability of alarming on a scan within 6 seconds of when the large-memory algorithm would. The best known prior algorithm (probabilistic counting) takes 4 times more memory on port scan detection and 8 times more on a measurement application. Fundamentally, this is because our algorithms can be customized to take advantage of special features of applications such as a large number of instances that have very small counts or prior knowledge of the likely range of the count.},
	urldate = {2018-04-22},
	booktitle = {Proceedings of the 3rd {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Estan, Cristian and Varghese, George and Fisk, Mike},
	year = {2003},
	keywords = {network traffic measurement, counting flows},
	pages = {153--166},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\V9QR34IN\\Estan et al. - 2003 - Bitmap Algorithms for Counting Active Flows on Hig.pdf:application/pdf}
}

@inproceedings{ramabhadran_efficient_2003,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '03},
	title = {Efficient {Implementation} of a {Statistics} {Counter} {Architecture}},
	isbn = {978-1-58113-664-7},
	doi = {10.1145/781027.781060},
	abstract = {Internet routers and switches need to maintain millions of (e.g., per prefix) counters at up to OC-768 speeds that are essential for traffic engineering. Unfortunately, the speed requirements require the use of large amounts of expensive SRAM memory. Shah et al [1]introduced a cheaper statistics counter architecture that uses a much smaller amount of SRAM by using the SRAM as a cache together with a (cheap) backing DRAM that stores the complete counters. Counters in SRAM are periodically updated to the DRAM before they overflow under the control of a counter management algorithm. Shah et al [1] also devised a counter management algorithm called LCF that they prove uses an optimal amount of SRAM. Unfortunately, it is difficult to implement LCF at high speeds because it requires sorting to evict the largest counter in the SRAM. This paper removes this bottleneck in [1] by proposing a counter management algorithm called LR(T) (Largest Recent with thresh-old T) that avoids sorting by only keeping a bitmap that tracks counters that are larger than threshold T. This allows LR(T) to be practically realizable using only at most 2 bits extra per counter and a simple pipelined data structure. Despite this, we show through a formal analysis, that for a particular value of the threshold T, the LR(T) requires an optimal amount of SRAM, matching LCF. Further,we also describe an implementation, based on a novel data structure called aggregated bitmap, that allows the LR(T) algorithm to be realized at line rates.},
	urldate = {2018-04-22},
	booktitle = {Proceedings of the 2003 {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Ramabhadran, Sriram and Varghese, George},
	year = {2003},
	keywords = {router, statistics counter},
	pages = {261--271},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\FFE5TW3M\\Ramabhadran and Varghese - 2003 - Efficient Implementation of a Statistics Counter A.pdf:application/pdf}
}

@article{ramakrishna_efficient_1997,
	title = {Efficient hardware hashing functions for high performance computers},
	volume = {46},
	issn = {0018-9340},
	doi = {10.1109/12.641938},
	abstract = {Hashing is critical for high performance computer architecture. Hashing is used extensively in hardware applications, such as page tables, for address translation. Bit extraction and exclusive ORing hashing “methods” are two commonly used hashing functions for hardware applications. There is no study of the performance of these functions and no mention anywhere of the practical performance of the hashing functions in comparison with the theoretical performance prediction of hashing schemes. In this paper, we show that, by choosing hashing functions at random from a particular class, called H3, of hashing functions, the analytical performance of hashing can be achieved in practice on real-life data. Our results about the expected worst case performance of hashing are of special significance, as they provide evidence for earlier theoretical predictions},
	number = {12},
	journal = {IEEE Transactions on Computers},
	author = {Ramakrishna, M. V. and Fu, E. and Bahcekapili, E.},
	month = dec,
	year = {1997},
	keywords = {Hardware, performance, computational complexity, Application software, analytical performance, Circuits, computer architecture, Computer architecture, file organisation, hashing functions, high performance computers, High performance computing, Information retrieval, Organizing, Performance analysis, performance evaluation, Probes, Software performance, worst case performance},
	pages = {1378--1381},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\UEHHHHTQ\\641938.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\YVKAVHT7\\Ramakrishna et al. - 1997 - Efficient hardware hashing functions for high perf.pdf:application/pdf}
}

@inproceedings{kumar_space-code_2004,
	series = {{INFOCOM}'04},
	title = {Space-{Code} {Bloom} {Filter} for {Efficient} {Per}-{Flow} {Traffic} {Measurement}},
	doi = {10.1109/INFCOM.2004.1354587},
	abstract = {Per-flow traffic measurement is critical for usage accounting, traffic engineering, and anomaly detection. Previous methodologies are either based on random sampling (e.g., Cisco's NetFlow), which is inaccurate, or only account for the "elephants". We introduce a novel technique for measuring per-flow traffic approximately, for all flows regardless of their sizes, at very high-speed (say, OC768). The core of this technique is a novel data structure called space code bloom filter (SCBF). A SCBF is an approximate representation of a multiset; each element in this multiset is a traffic flow and its multiplicity is the number of packets in the flow. The multiplicity of an element in the multiset represented by SCBF can be estimated through either of two mechanisms-maximum likelihood estimation (MLE) or mean value estimation (MVE). Through parameter tuning, SCBF allows for graceful tradeoff between measurement accuracy and computational and storage complexity. SCBF also contributes to the foundation of data streaming by introducing a new paradigm called blind streaming. We evaluate the performance of SCBF through mathematical analysis and through experiments on packet traces gathered from a tier-1 ISP backbone. Our results demonstrate that SCBF achieves reasonable measurement accuracy with very low storage and computational complexity},
	booktitle = {Proceedings of the 23rd {Conference} of the {IEEE} {Communications} {Society}},
	author = {Kumar, K. and Xu, J. and Wang, Jia and Spatschek, O. and Li, L.},
	month = mar,
	year = {2004},
	keywords = {maximum likelihood estimation, Internet, telecommunication traffic, Maximum likelihood estimation, sampling, Spine, Telecommunication traffic, Fluid flow measurement, sketch and streaming algorithm, blind streaming, data structure, data structures, mean value estimation, per-flow traffic measurement, tier-1 ISP backbone, Data structures, filtering theory, Filters, multiset, parameter tuning, Random access memory, Space technology, space-code bloom filter, State estimation, bloom filter},
	pages = {1762--1773},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\P54JJIH6\\1354587.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\CJMEZMU6\\Kumar et al. - 2004 - Space-code bloom filter for efficient per-flow tra.pdf:application/pdf}
}

@inproceedings{duffield_flow_2004,
	series = {{SIGMETRICS} '04/{Performance} '04},
	title = {Flow {Sampling} {Under} {Hard} {Resource} {Constraints}},
	isbn = {978-1-58113-873-3},
	doi = {10.1145/1005686.1005699},
	abstract = {Many network management applications use as their data traffic volumes differentiated by attributes such as IP address or port number. IP flow records are commonly collected for this purpose: these enable determination of fine-grained usage of network resources. However, the increasingly large volumes of flow statistics incur concomitant costs in the resources of the measurement infrastructure. This motivates sampling of flow records.This paper addresses sampling strategy for flow records. Recent work has shown that non-uniform sampling is necessary in order to control estimation variance arising from the observed heavy-tailed distribution of flow lengths. However, while this approach controls estimator variance, it does not place hard limits on the number of flows sampled. Such limits are often required during arbitrary downstream sampling, resampling and aggregation operations employed in analysis of the data.This paper proposes a correlated sampling strategy that is able to select an arbitrarily small number of the "best" representatives of a set of flows. We show that usage estimates arising from such selection are unbiased, and show how to estimate their variance, both offline for modeling purposes, and online during the sampling itself. The selection algorithm can be implemented in a queue-like data structure in which memory usage is uniformly bounded during measurement. Finally, we compare the complexity and performance of our scheme with other potential approaches.},
	urldate = {2018-04-23},
	booktitle = {Proceedings of the {Joint} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	author = {Duffield, Nick and Lund, Carsten and Thorup, Mikkel},
	year = {2004},
	keywords = {IP flows, sampling, variance reduction},
	pages = {85--96},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\V99P5ZRK\\Duffield et al. - 2004 - Flow Sampling Under Hard Resource Constraints.pdf:application/pdf}
}

@article{qiao_fast_2014,
	title = {Fast {Bloom} {Filters} and {Their} {Generalization}},
	volume = {25},
	issn = {1045-9219},
	doi = {10.1109/TPDS.2013.46},
	abstract = {Bloom filters have been extensively applied in many network functions. Their performance is judged by three criteria: query overhead, space requirement, and false positive ratio. Due to wide applicability, any improvement to the performance of Bloom filters can potentially have a broad impact in many areas of networking research. In this paper, we study Bloom-1, a data structure that performs membership check in one memory access, which compares favorably with the k memory accesses of a standard Bloom filter. We also generalize Bloom-1 to Bloom-g and Bloom-Q, allowing performance tradeoff between membership query overhead and false positive ratio. We thoroughly examine the variants in this family of filters, and show that they can be configured to outperform the Bloom filters with a smaller number of memory accesses, a smaller or equal number of hash bits, and a smaller or comparable false positive ratio in practical scenarios. We also perform experiments based on a real traffic trace to support our filter design.},
	number = {1},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Qiao, Y. and Li, T. and Chen, S.},
	month = jan,
	year = {2014},
	keywords = {Hardware, Throughput, data structures, Random access memory, Arrays, Bloom-1 data structure, Bloom-g, Bloom-Q, false positive, false positive ratio, fast Bloom filters, filter design, generalization, hash requirement, Information filtering, membership check, memory access, Memory management, network functions, networking research, query overhead, space requirement, traffic trace, bloom filter},
	pages = {93--103},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\J85TD69H\\6464259.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\3MA6ZJN6\\Qiao et al. - 2014 - Fast Bloom Filters and Their Generalization.pdf:application/pdf}
}

@article{rottenstreich_bloom_2015,
	title = {The {Bloom} {Paradox}: {When} {Not} to {Use} a {Bloom} {Filter}},
	volume = {23},
	issn = {1063-6692},
	shorttitle = {The {Bloom} {Paradox}},
	doi = {10.1109/TNET.2014.2306060},
	abstract = {In this paper, we uncover the Bloom paradox in Bloom Filters: Sometimes, the Bloom Filter is harmful and should not be queried. We first analyze conditions under which the Bloom paradox occurs in a Bloom Filter and demonstrate that it depends on the a priori probability that a given element belongs to the represented set. We show that the Bloom paradox also applies to Counting Bloom Filters (CBFs) and depends on the product of the hashed counters of each element. In addition, we further suggest improved architectures that deal with the Bloom paradox in Bloom Filters, CBFs, and their variants. We further present an application of the presented theory in cache sharing among Web proxies. Lastly, using simulations, we verify our theoretical results and show that our improved schemes can lead to a large improvement in the performance of Bloom Filters and CBFs.},
	number = {3},
	urldate = {2018-04-23},
	journal = {IEEE/ACM Trans. Netw.},
	author = {Rottenstreich, Ori and Keslassy, Isaac},
	month = jun,
	year = {2015},
	keywords = {a priori membership probability, counting Bloom filter, the Bloom filter paradox, bloom filter},
	pages = {703--716},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\2ZYNEYMF\\Rottenstreich and Keslassy - 2015 - The Bloom Paradox When Not to Use a Bloom Filter.pdf:application/pdf}
}

@book{varghese_network_2005,
	address = {Amsterdam ; Boston},
	series = {The {Morgan} {Kaufmann} series in networking},
	title = {Network algorithmics: an interdisciplinary approach to designing fast networked devices},
	isbn = {978-0-12-088477-3},
	shorttitle = {Network algorithmics},
	language = {en},
	publisher = {Elsevier/Morgan Kaufmann},
	author = {Varghese, George},
	year = {2005},
	note = {OCLC: ocm56912632},
	keywords = {计算机技术, 计算机系统, 算法, 网络, 网络设备, algorithm, Networking, Programming, Computer network protocols},
	file = {Network Algorithmics An Interdisciplinary Approach to Designing Fast Networked Devices.pdf:C\:\\Users\\zzy\\Zotero\\storage\\P94UTG7G\\Network Algorithmics An Interdisciplinary Approach to Designing Fast Networked Devices.pdf:application/pdf}
}

@inproceedings{kumar_data_2005,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '05},
	title = {A {Data} {Streaming} {Algorithm} for {Estimating} {Subpopulation} {Flow} {Size} {Distribution}},
	isbn = {978-1-59593-022-4},
	doi = {10.1145/1064212.1064221},
	abstract = {Statistical information about the flow sizes in the traffic passing through a network link helps a network operator to characterize network resource usage, infer traffic demands, detect traffic anomalies, and improve network performance through traffic engineering. Previous work on estimating the flow size distribution for the complete population of flows has produced techniques that either make inferences from sampled network traffic, or use data streaming approaches. In this work, we identify and solve a more challenging problem of estimating the size distribution and other statistical information about arbitrary subpopulations of flows. Inferring subpopulation flow statistics is more challenging than the complete population counterpart, since subpopulations of interest are often specified a posteriori (i.e., after the data collection is done), making it impossible for the data collection module to "plan in advance".Our solution consists of a novel mechanism that combines data streaming with traditional packet sampling to provide highly accurate estimates of subpopulation flow statistics. The algorithm employs two data collection modules operating in parallel --- a NetFlow-like packet sampler and a streaming data structure made up of an array of counters. Combining the data collected by these two modules, our estimation algorithm uses a statistical estimation procedure that correlates and decodes the outputs (observations) from both data collection modules to obtain flow statistics for any arbitrary subpopulation. Evaluations of this algorithm on real-world Internet traffic traces demonstrate its high measurement accuracy.},
	urldate = {2018-04-24},
	booktitle = {Proceedings of the 2005 {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Kumar, Abhishek and Sung, Minho and Xu, Jun (Jim) and Zegura, Ellen W.},
	year = {2005},
	keywords = {traffic analysis, statistical inference, data streaming, EM algorithm, flow statistics},
	pages = {61--72},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\IBSX6MDT\\Kumar et al. - 2005 - A Data Streaming Algorithm for Estimating Subpopul.pdf:application/pdf}
}

@inproceedings{brutlag_aberrant_2000,
	address = {Berkeley, CA, USA},
	title = {Aberrant {Behavior} {Detection} in {Time} {Series} for {Network} {Monitoring}},
	abstract = {The open-source software RRDtool and Cricket provide a solution to the problem of collecting, storing, and visualizing service network time series data for the real-time monitoring task. However, simultaneously monitoring all service network time series of interest is an impossible task even for the accomplished network technician. The solution is to integrate a mathematical model for automatic aberrant behavior detection in time series into the monitoring software. While there are many such models one might choose, the primary goal should be a model compatible with real-time monitoring. At WebTV, the solution was to integrate a model based on exponential smoothing and Holt-Winters forecasting into the Cricket/RRDtool architecture. While perhaps not optimal, this solution is flexible, efficient, and effective as a tool for automatic aberrant behavior detection.},
	urldate = {2018-04-24},
	booktitle = {Proceedings of the 14th {USENIX} {Conference} on {System} {Administration}},
	publisher = {USENIX Association},
	author = {Brutlag, Jake D.},
	year = {2000},
	pages = {139--146},
	file = {Brutlag - 2000 - Aberrant Behavior Detection in Time Series for Net.pdf:C\:\\Users\\zzy\\Zotero\\storage\\I2DGYHRI\\Brutlag - 2000 - Aberrant Behavior Detection in Time Series for Net.pdf:application/pdf}
}

@incollection{motwani_algorithms_2010,
	title = {Algorithms and {Theory} of {Computation} {Handbook}},
	isbn = {978-1-58488-822-2},
	url = {http://dl.acm.org/citation.cfm?id=1882757.1882769},
	urldate = {2018-04-26},
	publisher = {Chapman \& Hall/CRC},
	author = {Motwani, Rajeev and Raghavan, Prabhakar},
	editor = {Atallah, Mikhail J. and Blanton, Marina},
	year = {2010},
	pages = {12--12},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\6U8JZDXX\\Motwani and Raghavan - 2010 - Algorithms and Theory of Computation Handbook.pdf:application/pdf}
}

@article{karp_simple_2003,
	title = {A {Simple} {Algorithm} for {Finding} {Frequent} {Elements} in {Streams} and {Bags}},
	volume = {28},
	issn = {0362-5915},
	doi = {10.1145/762471.762473},
	abstract = {We present a simple, exact algorithm for identifying in a multiset the items with frequency more than a threshold θ. The algorithm requires two passes, linear time, and space 1/θ. The first pass is an on-line algorithm, generalizing a well-known algorithm for finding a majority element, for identifying a set of at most 1/θ items that includes, possibly among others, all items with frequency greater than θ.},
	number = {1},
	urldate = {2018-04-26},
	journal = {ACM Trans. Database Syst.},
	author = {Karp, Richard M. and Shenker, Scott and Papadimitriou, Christos H.},
	month = mar,
	year = {2003},
	keywords = {Data stream, frequent elements},
	pages = {51--55},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\NYEYEVHP\\Karp et al. - 2003 - A Simple Algorithm for Finding Frequent Elements i.pdf:application/pdf}
}

@article{bloom_space/time_1970,
	title = {Space/{Time} {Trade}-offs in {Hash} {Coding} with {Allowable} {Errors}},
	volume = {13},
	issn = {0001-0782},
	doi = {10.1145/362686.362692},
	abstract = {In this paper trade-offs among certain computational factors in hash coding are analyzed. The paradigm problem considered is that of testing a series of messages one-by-one for membership in a given set of messages. Two new hash-coding methods are examined and compared with a particular conventional hash-coding method. The computational factors considered are the size of the hash area (space), the time required to identify a message as a nonmember of the given set (reject time), and an allowable error frequency.
The new methods are intended to reduce the amount of space required to contain the hash-coded information from that associated with conventional methods. The reduction in space is accomplished by exploiting the possibility that a small fraction of errors of commission may be tolerable in some applications, in particular, applications in which a large amount of data is involved and a core resident hash area is consequently not feasible using conventional methods.
In such applications, it is envisaged that overall performance could be improved by using a smaller core resident hash area in conjunction with the new methods and, when necessary, by using some secondary and perhaps time-consuming test to “catch” the small fraction of errors associated with the new methods. An example is discussed which illustrates possible areas of application for the new methods.
Analysis of the paradigm problem demonstrates that allowing a small number of test messages to be falsely identified as members of the given set will permit a much smaller hash area to be used without increasing reject time.},
	number = {7},
	urldate = {2018-04-26},
	journal = {Commun. ACM},
	author = {Bloom, Burton H.},
	month = jul,
	year = {1970},
	keywords = {bloom filter, hash addressing, hash coding, retrieval efficiency, retrieval trade-offs, scatter storage, searching, storage efficiency, storage layout},
	pages = {422--426},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\CUMYPX3N\\Bloom - 1970 - SpaceTime Trade-offs in Hash Coding with Allowabl.pdf:application/pdf}
}

@inproceedings{alon_space_1996,
	address = {New York, NY, USA},
	series = {{STOC} '96},
	title = {The {Space} {Complexity} of {Approximating} the {Frequency} {Moments}},
	isbn = {978-0-89791-785-8},
	doi = {10.1145/237814.237823},
	urldate = {2018-04-26},
	booktitle = {Proceedings of the {Twenty}-eighth {Annual} {ACM} {Symposium} on {Theory} of {Computing}},
	publisher = {ACM},
	author = {Alon, Noga and Matias, Yossi and Szegedy, Mario},
	year = {1996},
	pages = {20--29},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\N34YE9WS\\Alon et al. - 1996 - The Space Complexity of Approximating the Frequenc.pdf:application/pdf}
}

@inproceedings{cohen_spectral_2003,
	address = {New York, NY, USA},
	series = {{SIGMOD} '03},
	title = {Spectral {Bloom} {Filters}},
	isbn = {978-1-58113-634-0},
	doi = {10.1145/872757.872787},
	abstract = {A Bloom Filter is a space-efficient randomized data structure allowing membership queries over sets with certain allowable errors. It is widely used in many applications which take advantage of its ability to compactly represent a set, and filter out effectively any element that does not belong to the set, with small error probability. This paper introduces the Spectral Bloom Filter (SBF), an extension of the original Bloom Filter to multi-sets, allowing the filtering of elements whose multiplicities are below a threshold given at query time. Using memory only slightly larger than that of the original Bloom Filter, the SBF supports queries on the multiplicities of individual keys with a guaranteed, small error probability. The SBF also supports insertions and deletions over the data set. We present novel methods for reducing the probability and magnitude of errors. We also present an efficient data structure and algorithms to build it incrementally and maintain it over streaming data, as well as over materialized data with arbitrary insertions and deletions. The SBF does not assume any a priori filtering threshold and effectively and efficiently maintains information over the entire data-set, allowing for ad-hoc queries with arbitrary parameters and enabling a range of new applications.},
	urldate = {2018-04-26},
	booktitle = {Proceedings of the 2003 {ACM} {SIGMOD} {International} {Conference} on {Management} of {Data}},
	publisher = {ACM},
	author = {Cohen, Saar and Matias, Yossi},
	year = {2003},
	keywords = {bloom filter},
	pages = {241--252},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\JP3T3AYM\\Cohen and Matias - 2003 - Spectral Bloom Filters.pdf:application/pdf}
}

@inproceedings{demaine_frequency_2002,
	address = {London, UK, UK},
	series = {{ESA} '02},
	title = {Frequency {Estimation} of {Internet} {Packet} {Streams} with {Limited} {Space}},
	isbn = {978-3-540-44180-9},
	abstract = {We consider a router on the Internet analyzing the statistical properties of a TCP/IP packet stream. A fundamental difficulty with measuring traffic behavior on the Internet is that there is simply too much data to be recorded for later analysis, on the order of gigabytes a second. As a result, network routers can collect only relatively few statistics about the data. The central problem addressed here is to use the limited memory of routers to determine essential features of the network traffic stream. A particularly difficult and representative subproblem is to determine the top k categories to which the most packets belong, for a desired value of k and for a given notion of categorization such as the destination IP address.We present an algorithm that deterministically finds (in particular) all categories having a frequency above 1/(m+1) using m counters, which we prove is best possible in the worst case. We also present a sampling-based algorithm for the case that packet categories follow an arbitrary distribution, but their order over time is permuted uniformly at random. Under this model, our algorithm identifies flows above a frequency threshold of roughly 1/¿nm with high probability, where m is the number of counters and n is the number of packets observed. This guarantee is not far off from the ideal of identifying all flows (probability 1/n), and we prove that it is best possible up to a logarithmic factor. We show that the algorithm ranks the identified flows according to frequency within any desired constant factor of accuracy.},
	urldate = {2018-04-26},
	booktitle = {Proceedings of the 10th {Annual} {European} {Symposium} on {Algorithms}},
	publisher = {Springer-Verlag},
	author = {Demaine, Erik D. and López-Ortiz, Alejandro and Munro, J. Ian},
	year = {2002},
	pages = {348--360},
	file = {Demaine et al. - 2002 - Frequency Estimation of Internet Packet Streams wi.pdf:C\:\\Users\\zzy\\Zotero\\storage\\QL5VYIUN\\Demaine et al. - 2002 - Frequency Estimation of Internet Packet Streams wi.pdf:application/pdf}
}

@inproceedings{iyer_approach_2003,
	title = {An approach to alleviate link overload as observed on an {IP} backbone},
	volume = {1},
	doi = {10.1109/INFCOM.2003.1208692},
	abstract = {Shortest path routing protocols may suffer from congestion due to the use of a single shortest path between a source and a destination. The goal of our work is to first understand how links become overloaded in an IP backbone, and then to explore if the routing protocol, -either in its existing form, or in some enhanced form could be made to respond immediately to overload and reduce the likelihood of its occurrence. Our method is to use extensive measurements of Sprint's backbone network, measuring 138 links between September 2000 and June 2001. We find that since the backbone is designed to be overprovisioned, link overload is rare, and when it occurs, 80\% of the time it is caused due to link failures. Furthermore, we find that when a link is overloaded, few (if any) other links in the network are also overloaded. This suggests that deflecting packets to less utilized alternate paths could be an effective method for tackling overload. We analytically derive the condition that a network, which has multiple equal length shortest paths between every pair of nodes (as is common in the highly meshed backbone networks) can provide for loop-free deflection paths if all the link weights are within a ratio 1 + 1/(d- I) of each other; where d is the diameter of the network. Based on our measurements, the nature of the backbone topology and the careful use of link weights, we propose a deflection routing algorithm to tackle link overload where each node makes local decisions. Simulations suggest that this can be a simple and efficient way to overcome link overload, without requiring any changes to the routing protocol.},
	booktitle = {{IEEE} {INFOCOM} 2003. {Twenty}-second {Annual} {Joint} {Conference} of the {IEEE} {Computer} and {Communications} {Societies} ({IEEE} {Cat}. {No}.03CH37428)},
	author = {Iyer, Sundar and Bhattacharyya, Supratik and Taft, N. and Diot, C.},
	month = mar,
	year = {2003},
	keywords = {Routing protocols, Throughput, Network topology, Spine, Telecommunication traffic, transport protocols, Availability, Costs, deflection routing algorithm, IP backbone topology, link failure, link overload, link weights, Load management, loop-free deflection paths, network measurements, Resilience, routing protocols, shortest path routing protocols, Sprint backbone network, telecommunication congestion control, telecommunication links, Traffic control},
	pages = {406--416 vol.1},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\3R96BS4I\\1208692.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\39CDXZZ9\\Iyer et al. - 2003 - An approach to alleviate link overload as observed.pdf:application/pdf}
}

@article{fraleigh_packet-level_2003,
	title = {Packet-level traffic measurements from the {Sprint} {IP} backbone},
	volume = {17},
	issn = {0890-8044},
	doi = {10.1109/MNET.2003.1248656},
	abstract = {Network traffic measurements provide essential data for networking research and network management. In this article we describe a passive monitoring system designed to capture GPS synchronized packet-level traffic measurements on OC-3, OC-12, and OC-48 links. Our system is deployed in four POP in the Sprint IP backbone. Measurement data is stored on a 10 Tbyte storage area network and analyzed on a computing cluster. We present a set of results to both demonstrate the strength of the system and identify recent changes in Internet traffic characteristics. The results include traffic workload, analyses of TCP flow round-trip times, out-of-sequence packet rates, and packet delay. We also show that some links no longer carry Web traffic as their dominant component to the benefit of file sharing and media streaming. On most links we monitored, TCP flows exhibit low out-of-sequence packet rates, and backbone delays are dominated by the speed of light.},
	number = {6},
	journal = {IEEE Network},
	author = {Fraleigh, C. and Moon, S. and Lyles, B. and Cotton, C. and Khan, M. and Moll, D. and Rockell, R. and Seely, T. and Diot, S. C.},
	month = nov,
	year = {2003},
	keywords = {Internet, Monitoring, monitoring, Spine, Telecommunication traffic, transport protocols, network management, Area measurement, backbone delays, Computer networks, Delay, delays, file sharing, Global Positioning System, GPS synchronized measurements, Internet traffic characteristics, media streaming, multimedia communication, network traffic measurements, OC-12, OC-3, OC-48 links, out-of-sequence packet rates, packet delay, packet switching, packet-level traffic measurements, passive monitoring system, POP, Sprint IP backbone, Storage area networks, Streaming media, TCP flow round-trip times, telecommunication traffic recording, traffic workload, Web traffic},
	pages = {6--16},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\RA365ARZ\\1248656.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\LRAU37BR\\Fraleigh et al. - 2003 - Packet-level traffic measurements from the Sprint .pdf:application/pdf}
}

@article{fan_summary_2000,
	title = {Summary cache: a scalable wide-area {Web} cache sharing protocol},
	volume = {8},
	issn = {1063-6692},
	shorttitle = {Summary cache},
	doi = {10.1109/90.851975},
	abstract = {The sharing of caches among Web proxies is an important technique to reduce Web traffic and alleviate network bottlenecks. Nevertheless it is not widely deployed due to the overhead of existing protocols. In this paper we demonstrate the benefits of cache sharing, measure the overhead of the existing protocols, and propose a new protocol called “summary cache”. In this new protocol, each proxy keeps a summary of the cache directory of each participating proxy, and checks these summaries for potential hits before sending any queries. Two factors contribute to our protocol's low overhead: the summaries are updated only periodically, and the directory representations are very economical, as low as 8 bits per entry. Using trace-driven simulations and a prototype implementation, we show that, compared to existing protocols such as the Internet cache protocol (ICP), summary cache reduces the number of intercache protocol messages by a factor of 25 to 60, reduces the bandwidth consumption by over 50\%, eliminates 30\% to 95\% of the protocol CPU overhead, all while maintaining almost the same cache hit ratios as ICP. Hence summary cache scales to a large number of proxies. (This paper is a revision of Fan et al. 1998; we add more data and analysis in this version.)},
	number = {3},
	journal = {IEEE/ACM Transactions on Networking},
	author = {Fan, Li and Cao, Pei and Almeida, J. and Broder, A. Z.},
	month = jun,
	year = {2000},
	keywords = {Bandwidth, protocols, Routing protocols, Internet, Particle measurements, Telecommunication traffic, Computer science, bloom filter, bandwidth consumption, Bit rate, cache hit ratios, Cache memory, cache storage, Data analysis, directory representations, intercache protocol messages, overhead, prototype implementation, queries, scalable wide-area Web cache sharing protocol, summary cache, trace-driven simulations, Virtual prototyping, Web proxies},
	pages = {281--293},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\E3YYSYF8\\851975.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\A4BKZY58\\Fan et al. - 2000 - Summary cache a scalable wide-area Web cache shar.pdf:application/pdf}
}

@book{chong_introduction_2013,
	address = {Hoboken, New Jersey},
	edition = {Fourth edition},
	series = {Wiley series in discrete mathematics and optimization},
	title = {An introduction to optimization},
	isbn = {978-1-118-27901-4},
	abstract = {"The purpose of the book is to give the reader a working knowledge of optimization theory and methods"--},
	publisher = {Wiley},
	author = {Chong, Edwin Kah Pin and Żak, Stanislaw H.},
	year = {2013},
	keywords = {optimization, Discrete Mathematics},
	file = {Chong and Żak - 2001 - An introduction to optimization.pdf:C\:\\Users\\zzy\\Zotero\\storage\\FU5NEVD3\\Chong and Żak - 2001 - An introduction to optimization.pdf:application/pdf}
}

@article{racz_basic_2016,
	title = {Basic models and questions in statistical network analysis},
	language = {en},
	urldate = {2018-04-27},
	author = {Racz, Miklos Z. and Bubeck, Sébastien},
	month = sep,
	year = {2016},
	keywords = {probability theory},
	file = {Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\N8YNZ3VR\\Racz and Bubeck - 2016 - Basic models and questions in statistical network .pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\ACZLXDDP\\1609.html:text/html}
}

@book{noauthor_introduction_2017,
	title = {Introduction to {GnuPlot}},
	url = {http://internal.physics.uwa.edu.au/~wang/Gnu/GnuplotTutorial.pdf},
	urldate = {2018-04-29},
	year = {2017},
	file = {GnuplotTutorial.pdf:C\:\\Users\\zzy\\Zotero\\storage\\NZSCBMF6\\GnuplotTutorial.pdf:application/pdf}
}

@book{gray_entropy_1990,
	address = {New York},
	title = {Entropy and information theory},
	isbn = {978-0-387-97371-5 978-3-540-97371-3},
	publisher = {Springer-Verlag},
	author = {Gray, Robert M.},
	year = {1990},
	keywords = {Coding theory, entropy of information theory},
	file = {Gray - 1990 - Entropy and information theory.pdf:C\:\\Users\\zzy\\Zotero\\storage\\V2T3SIVB\\Gray - 1990 - Entropy and information theory.pdf:application/pdf}
}

@article{jaynes_information_1957,
	title = {Information {Theory} and {Statistical} {Mechanics}},
	volume = {106},
	doi = {10.1103/PhysRev.106.620},
	abstract = {Information theory provides a constructive criterion for setting up probability distributions on the basis of partial knowledge, and leads to a type of statistical inference which is called the maximum-entropy estimate. It is the least biased estimate possible on the given information; i.e., it is maximally noncommittal with regard to missing information. If one considers statistical mechanics as a form of statistical inference rather than as a physical theory, it is found that the usual computational rules, starting with the determination of the partition function, are an immediate consequence of the maximum-entropy principle. In the resulting "subjective statistical mechanics," the usual rules are thus justified independently of any physical argument, and in particular independently of experimental verification; whether or not the results agree with experiment, they still represent the best estimates that could have been made on the basis of the information available.},
	number = {4},
	urldate = {2018-04-30},
	journal = {Phys. Rev.},
	author = {Jaynes, E. T.},
	month = may,
	year = {1957},
	pages = {620--630},
	file = {APS Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\H8GVSIW2\\PhysRev.106.html:text/html;theory.1.pdf:C\:\\Users\\zzy\\Zotero\\storage\\QKD77Z4Y\\theory.1.pdf:application/pdf}
}

@article{shannon_mathematical_2001,
	title = {A {Mathematical} {Theory} of {Communication}},
	volume = {5},
	issn = {1559-1662},
	doi = {10.1145/584091.584093},
	number = {1},
	urldate = {2018-04-30},
	journal = {SIGMOBILE Mob. Comput. Commun. Rev.},
	author = {Shannon, C. E.},
	month = jan,
	year = {2001},
	pages = {3--55},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\WUQ9EGRK\\Shannon - 2001 - A Mathematical Theory of Communication.pdf:application/pdf}
}

@article{shannon_mathematical_1948,
	title = {A mathematical theory of communication},
	volume = {27},
	issn = {0005-8580},
	doi = {10.1002/j.1538-7305.1948.tb01338.x},
	abstract = {The recent development of various methods of modulation such as PCM and PPM which exchange bandwidth for signal-to-noise ratio has intensified the interest in a general theory of communication. A basis for such a theory is contained in the important papers of Nyquist1 and Hartley2 on this subject. In the present paper we will extend the theory to include a number of new factors, in particular the effect of noise in the channel, and the savings possible due to the statistical structure of the original message and due to the nature of the final destination of the information.},
	number = {3},
	journal = {The Bell System Technical Journal},
	author = {Shannon, C. E.},
	month = jul,
	year = {1948},
	pages = {379--423},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\XHC33J5W\\6773024.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\R7BMURRR\\Shannon - 1948 - A mathematical theory of communication.pdf:application/pdf}
}

@inproceedings{gu_detecting_2005,
	address = {Berkeley, CA, USA},
	series = {{IMC} '05},
	title = {Detecting {Anomalies} in {Network} {Traffic} {Using} {Maximum} {Entropy} {Estimation}},
	abstract = {We develop a behavior-based anomaly detection method that detects network anomalies by comparing the current network traffic against a baseline distribution. The Maximum Entropy technique provides a flexible and fast approach to estimate the baseline distribution, which also gives the network administrator a multi-dimensional view of the network traffic. By computing a measure related to the relative entropy of the network traffic under observation with respect to the baseline distribution, we are able to distinguish anomalies that change the traffic either abruptly or slowly. In addition, our method provides information revealing the type of the anomaly detected. It requires a constant memory and a computation time proportional to the traffic rate.},
	urldate = {2018-05-01},
	booktitle = {Proceedings of the 5th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {USENIX Association},
	author = {Gu, Yu and McCallum, Andrew and Towsley, Don},
	year = {2005},
	keywords = {entropy of information theory},
	pages = {32--32},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\M6YWSTPZ\\Gu et al. - 2005 - Detecting Anomalies in Network Traffic Using Maxim.pdf:application/pdf}
}

@article{tarkoma_theory_2012,
	title = {Theory and {Practice} of {Bloom} {Filters} for {Distributed} {Systems}},
	volume = {14},
	issn = {1553-877X},
	doi = {10.1109/SURV.2011.031611.00024},
	abstract = {Many network solutions and overlay networks utilize probabilistic techniques to reduce information processing and networking costs. This survey article presents a number of frequently used and useful probabilistic techniques. Bloom filters and their variants are of prime importance, and they are heavily used in various distributed systems. This has been reflected in recent research and many new algorithms have been proposed for distributed systems that are either directly or indirectly based on Bloom filters. In this survey, we give an overview of the basic and advanced techniques, reviewing over 20 variants and discussing their application in distributed systems, in particular for caching, peer-to-peer systems, routing and forwarding, and measurement data summarization.},
	number = {1},
	journal = {IEEE Communications Surveys Tutorials},
	author = {Tarkoma, S. and Rothenberg, C. E. and Lagerspetz, E.},
	year = {2012},
	keywords = {telecommunication network routing, peer-to-peer computing, routing, Arrays, bloom filter, Bismuth, distributed systems, Filtering theory, filters, Fingerprint recognition, forwarding, information processing, measurement data summarization, networking costs, overlay networks, Peer to peer computing, peer-to-peer systems, Probabilistic logic, probabilistic structures, probabilistic techniques, probability},
	pages = {131--155},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\HJLZS7AK\\Tarkoma et al. - 2012 - Theory and practice of bloom filters for distribut.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\43EAIHH4\\5751342.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\Q74NHYQX\\Tarkoma et al. - 2012 - Theory and Practice of Bloom Filters for Distribut.pdf:application/pdf}
}

@inproceedings{dharmapurikar_longest_2003,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '03},
	title = {Longest {Prefix} {Matching} {Using} {Bloom} {Filters}},
	isbn = {978-1-58113-735-4},
	doi = {10.1145/863955.863979},
	abstract = {We introduce the first algorithm that we are aware of to employ Bloom filters for Longest Prefix Matching (LPM). The algorithm performs parallel queries on Bloom filters, an efficient data structure for membership queries, in order to determine address prefix membership in sets of prefixes sorted by prefix length. We show that use of this algorithm for Internet Protocol (IP) routing lookups results in a search engine providing better performance and scalability than TCAM-based approaches. The key feature of our technique is that the performance, as determined by the number of dependent memory accesses per lookup, can be held constant for longer address lengths or additional unique address prefix lengths in the forwarding table given that memory resources scale linearly with the number of prefixes in the forwarding table.Our approach is equally attractive for Internet Protocol Version 6 (IPv6) which uses 128-bit destination addresses, four times longer than IPv4. We present a basic version of our approach along with optimizations leveraging previous advances in LPM algorithms. We also report results of performance simulations of our system using snapshots of IPv4 BGP tables and extend the results to IPv6. Using less than 2Mb of embedded RAM and a commodity SRAM device, our technique achieves average performance of one hash probe per lookup and a worst case of two hash probes and one array access per lookup.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the 2003 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	publisher = {ACM},
	author = {Dharmapurikar, Sarang and Krishnamurthy, Praveen and Taylor, David E.},
	year = {2003},
	keywords = {bloom filter, forwarding, IP lookup, longest prefix matching},
	pages = {201--212},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\XAJNZV8L\\Dharmapurikar et al. - 2003 - Longest Prefix Matching Using Bloom Filters.pdf:application/pdf}
}

@inproceedings{song_fast_2005,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '05},
	title = {Fast {Hash} {Table} {Lookup} {Using} {Extended} {Bloom} {Filter}: {An} {Aid} to {Network} {Processing}},
	isbn = {978-1-59593-009-5},
	shorttitle = {Fast {Hash} {Table} {Lookup} {Using} {Extended} {Bloom} {Filter}},
	doi = {10.1145/1080091.1080114},
	abstract = {Hash tables are fundamental components of several network processing algorithms and applications, including route lookup, packet classification, per-flow state management and network monitoring. These applications, which typically occur in the data-path of high-speed routers, must process and forward packets with little or no buffer, making it important to maintain wire-speed throughout. A poorly designed hash table can critically affect the worst-case throughput of an application, since the number of memory accesses required for each lookup can vary. Hence, high throughput applications require hash tables with more predictable worst-case lookup performance. While published papers often assume that hash table lookups take constant time, there is significant variation in the number of items that must be accessed in a typical hash table search, leading to search times that vary by a factor of four or more.We present a novel hash table data structure and lookup algorithm which improves the performance over a naive hash table by reducing the number of memory accesses needed for the most time-consuming lookups. This allows designers to achieve higher lookup performance for a given memory bandwidth, without requiring large amounts of buffering in front of the lookup engine. Our algorithm extends the multiple-hashing Bloom Filter data structure to support exact matches and exploits recent advances in embedded memory technology. Through a combination of analysis and simulations we show that our algorithm is significantly faster than a naive hash table using the same amount of memory, hence it can support better throughput for router applications that use hash tables.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the 2005 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	publisher = {ACM},
	author = {Song, Haoyu and Dharmapurikar, Sarang and Turner, Jonathan and Lockwood, John},
	year = {2005},
	keywords = {bloom filter, forwarding, hash table},
	pages = {181--192},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\U3KRULQI\\Song et al. - 2005 - Fast Hash Table Lookup Using Extended Bloom Filter.pdf:application/pdf}
}

@inproceedings{song_ipv6_2009,
	title = {{IPv}6 {Lookups} using {Distributed} and {Load} {Balanced} {Bloom} {Filters} for 100Gbps {Core} {Router} {Line} {Cards}},
	doi = {10.1109/INFCOM.2009.5062180},
	abstract = {Internet line speeds are expected to reach 100 Gbps in a few years. To match these line rates, a single router line card needs to forward more than 150 million packets per second. This requires a corresponding amount of longest prefix match operations. Furthermore, the increased use of IPv6 requires core routers to perform the longest prefix match on several hundred thousand prefixes varying in length up to 64 bits. It is a challenge to scale existing algorithms simultaneously in the three dimensions of increased throughput, table size and prefix length. Recently, Bloom filter-based IP lookup algorithms have been proposed. While these algorithms can take advantage of hardware parallelism and fast on-chip memory to achieve high performance, they have significant drawbacks (discussed in the paper) that impede their use in practice. In this paper, we present the distributed and load balanced bloom filters to address these drawbacks. We develop the practical IP lookup algorithm for use in 100 Gbps line cards. The regular and modular hardware architecture of our scheme directly maps to the state-of-art ASICs and FPGAs with reasonable resource consumption. Also, our scheme outperforms TCAMs on most metrics including cost, power dissipation, and board footprint.},
	booktitle = {{IEEE} {INFOCOM} 2009},
	author = {Song, H. and Hao, F. and Kodialam, M. and Lakshman, T. V.},
	month = apr,
	year = {2009},
	keywords = {Hardware, Throughput, Internet, IP networks, telecommunication network routing, resource allocation, transport protocols, Inspection, Communications Society, Filters, Random access memory, bloom filter, bit rate 100 Gbit/s, board footprint, core router line card, distributed-and-load balanced bloom filter, Energy consumption, field programmable gate arrays, FPGA, hardware parallel architecture, IPv6 lookup, on-chip memory, parallel architectures, power dissipation, Power dissipation, resource consumption, table lookup, Transceivers, USA Councils},
	pages = {2518--2526},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\SW3Z48GV\\5062180.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\7N4LTWJA\\Song et al. - 2009 - IPv6 Lookups using Distributed and Load Balanced B.pdf:application/pdf}
}

@article{broder_network_2004,
	title = {Network applications of bloom filters: {A} survey},
	volume = {1},
	shorttitle = {Network applications of bloom filters},
	number = {4},
	journal = {Internet mathematics},
	author = {Broder, Andrei and Mitzenmacher, Michael},
	year = {2004},
	keywords = {bloom filter},
	pages = {485--509},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\IU9A3GTL\\Broder and Mitzenmacher - 2004 - Network applications of bloom filters A survey.pdf:application/pdf}
}

@inproceedings{yuan_counting_2008,
	title = {Counting {Data} {Stream} {Based} on {Improved} {Counting} {Bloom} {Filter}},
	doi = {10.1109/WAIM.2008.45},
	abstract = {Burst detection is an inherent problem for data streams, so it has attracted extensive attention in research community due to its broad applications. One of the basic problems in burst detection is how to count frequencies of all elements in data stream. This paper presents a novel solution based on Improved Counting Bloom Filter, which is also called BCBF+HSet. Comparing with intuitionistic approach such as array and list, our solution significantly reduces space complexity though it introduces few error rates. Further, we discuss space/time complexity and error rate of our solution, and compare it with two classic Counting Bloom Filters, CBF and DCF. Theoretical analysis and simulation results demonstrate the efficiency of the proposed solution.},
	booktitle = {2008 {The} {Ninth} {International} {Conference} on {Web}-{Age} {Information} {Management}},
	author = {Yuan, Z. and Miao, J. and Jia, Y. and Wang, L.},
	month = jul,
	year = {2008},
	keywords = {Monitoring, computational complexity, Counting circuits, Application software, data structure, data structures, Information filtering, bloom filter, BCBF+HSet, burst, burst detection, Computer crime, data stream, database management systems, element frequency, Error analysis, error statistics, Frequency, improved counting bloom filter, Information filters, Information management, space complexity, Statistical distributions, time complexity},
	pages = {512--519},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\GINRJA5P\\4597059.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\Z55C74KC\\Yuan et al. - 2008 - Counting Data Stream Based on Improved Counting Bl.pdf:application/pdf}
}

@article{chang_bigtable:_2008,
	title = {Bigtable: {A} {Distributed} {Storage} {System} for {Structured} {Data}},
	volume = {26},
	issn = {0734-2071},
	shorttitle = {Bigtable},
	doi = {10.1145/1365815.1365816},
	abstract = {Bigtable is a distributed storage system for managing structured data that is designed to scale to a very large size: petabytes of data across thousands of commodity servers. Many projects at Google store data in Bigtable, including web indexing, Google Earth, and Google Finance. These applications place very different demands on Bigtable, both in terms of data size (from URLs to web pages to satellite imagery) and latency requirements (from backend bulk processing to real-time data serving). Despite these varied demands, Bigtable has successfully provided a flexible, high-performance solution for all of these Google products. In this article, we describe the simple data model provided by Bigtable, which gives clients dynamic control over data layout and format, and we describe the design and implementation of Bigtable.},
	number = {2},
	urldate = {2018-05-02},
	journal = {ACM Trans. Comput. Syst.},
	author = {Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C. and Wallach, Deborah A. and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E.},
	month = jun,
	year = {2008},
	keywords = {Distributed, Large-Scale, Storage},
	pages = {4:1--4:26},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\CWR8Z7UG\\Chang et al. - 2008 - Bigtable A Distributed Storage System for Structu.pdf:application/pdf}
}

@article{dietzfelbinger_dynamic_1994,
	title = {Dynamic perfect hashing: {Upper} and lower bounds},
	volume = {23},
	shorttitle = {Dynamic perfect hashing},
	number = {4},
	journal = {SIAM Journal on Computing},
	author = {Dietzfelbinger, Martin and Karlin, Anna and Mehlhorn, Kurt and Meyer auF der Heide, Friedhelm and Rohnert, Hans and Tarjan, Robert E.},
	year = {1994},
	pages = {738--761},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\24GLDGTL\\Dietzfelbinger et al. - 1994 - Dynamic perfect hashing Upper and lower bounds.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\WB23HRUN\\S0097539791194094.html:text/html}
}

@inproceedings{hao_building_2007,
	address = {New York, NY, USA},
	series = {{SIGMETRICS} '07},
	title = {Building {High} {Accuracy} {Bloom} {Filters} {Using} {Partitioned} {Hashing}},
	isbn = {978-1-59593-639-4},
	doi = {10.1145/1254882.1254916},
	abstract = {The growing importance of operations such as packet-content inspection, packet classification based on non-IP headers, maintaining flow-state, etc. has led to increased interest in the networking applications of Bloom filters. This is because Bloom filters provide a relatively easy method for hardware implementation of set-membership queries. However, the tradeoff is that Bloom filters only provide a probabilistic test and membership queries can result in false positives. Ideally, we would like this false positive probability to be very low. The main contribution of this paper is a method for significantly reducing this false positive probability in comparison to existing schemes. This is done by developing a partitioned hashing method which results in a choice of hash functions that set far fewer bits in the Bloom filter bit vector than would be the case otherwise. This lower fill factor of the bit vector translates to a much lower false positive probability. We show experimentally that this improved choice can result in as much as a ten-fold increase in accuracy over standard Bloom filters. We also show that the scheme performs much better than other proposed schemes for improving Bloom filters.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the 2007 {ACM} {SIGMETRICS} {International} {Conference} on {Measurement} and {Modeling} of {Computer} {Systems}},
	publisher = {ACM},
	author = {Hao, Fang and Kodialam, Murali and Lakshman, T. V.},
	year = {2007},
	keywords = {bloom filter, hashing},
	pages = {277--288},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\A7XXWS9M\\Hao et al. - 2007 - Building High Accuracy Bloom Filters Using Partiti.pdf:application/pdf}
}

@inproceedings{hao_fast_2009,
	title = {Fast {Multiset} {Membership} {Testing} {Using} {Combinatorial} {Bloom} {Filters}},
	doi = {10.1109/INFCOM.2009.5061957},
	abstract = {In this paper we consider the problem of designing a data structure that can perform fast multiset membership testing in deterministic time. Our primary goal is to develop a hardware implementation of the data structure which uses only embedded memory blocks. Prior efforts to solve this problem involve hashing into multiple bloom filters. Such approach needs a priori knowledge of the number of elements in each set in order to size the bloom filter. We use a single bloom filter based approach and use multiple sets of hash functions to code for the set (group) id. Since a single bloom filter is used, it does not need a priori knowledge of the distribution of the elements across the different sets. We show how to improve the performance of the data structure by using constant weight error correcting codes for coding the group id. Using error correcting codes improves the performance of these data structures especially when there are large number of sets. We also outline an efficient hardware based approach to generate the the large number of hash functions that we need for this data structure. The resulting data structure, COMB, is amenable to a variety of time-critical network applications.},
	booktitle = {{IEEE} {INFOCOM} 2009},
	author = {Hao, F. and Kodialam, M. and Lakshman, T. V. and Song, H.},
	month = apr,
	year = {2009},
	keywords = {Hardware, Throughput, data structure, data structures, Data structures, Filters, bloom filter, Energy consumption, combinatorial bloom filters, constant weight error correcting codes, cryptography, efficient hardware based approach, error correction codes, Error correction codes, fast dynamic multiset membership testing, hash functions, information filtering, Pattern matching, Switches, Table lookup, Testing, time-critical network applications},
	pages = {513--521},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\DCRPX5YE\\5061957.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\PR64BQ2T\\Hao et al. - 2009 - Fast Multiset Membership Testing Using Combinatori.pdf:application/pdf}
}

@article{guo_dynamic_2010,
	title = {The {Dynamic} {Bloom} {Filters}},
	volume = {22},
	issn = {1041-4347},
	doi = {10.1109/TKDE.2009.57},
	abstract = {A Bloom filter is an effective, space-efficient data structure for concisely representing a set, and supporting approximate membership queries. Traditionally, the Bloom filter and its variants just focus on how to represent a static set and decrease the false positive probability to a sufficiently low level. By investigating mainstream applications based on the Bloom filter, we reveal that dynamic data sets are more common and important than static sets. However, existing variants of the Bloom filter cannot support dynamic data sets well. To address this issue, we propose dynamic Bloom filters to represent dynamic sets, as well as static sets and design necessary item insertion, membership query, item deletion, and filter union algorithms. The dynamic Bloom filter can control the false positive probability at a low level by expanding its capacity as the set cardinality increases. Through comprehensive mathematical analysis, we show that the dynamic Bloom filter uses less expected memory than the Bloom filter when representing dynamic sets with an upper bound on set cardinality, and also that the dynamic Bloom filter is more stable than the Bloom filter due to infrequent reconstruction when addressing dynamic sets without an upper bound on set cardinality. Moreover, the analysis results hold in stand-alone applications, as well as distributed applications.},
	number = {1},
	journal = {IEEE Transactions on Knowledge and Data Engineering},
	author = {Guo, D. and Wu, J. and Chen, H. and Yuan, Y. and Luo, X.},
	month = jan,
	year = {2010},
	keywords = {data structure, data structures, mathematical analysis, bloom filter, probability, dynamic Bloom filters, information representation., dynamic Bloom filter, false positive probability, filter union algorithm, item deletion, membership query, query processing},
	pages = {120--133},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\5J49EEYS\\4796196.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\IGZK3U5I\\Guo et al. - 2010 - The Dynamic Bloom Filters.pdf:application/pdf}
}

@inproceedings{rottenstreich_variable-increment_2012,
	title = {The {Variable}-{Increment} {Counting} {Bloom} {Filter}},
	doi = {10.1109/INFCOM.2012.6195563},
	abstract = {Counting Bloom Filters (CBFs) are widely used in networking device algorithms. They implement fast set representations to support membership queries with limited error, and support element deletions unlike Bloom Filters. However, they consume significant amounts of memory. In this paper we introduce a new general method based on variable increments to improve the efficiency of CBFs and their variants. Unlike CBFs, at each element insertion, the hashed counters are incremented by a hashed variable increment instead of a unit increment. Then, to query an element, the exact value of a counter is considered and not just its positiveness. We present two simple schemes based on this method. We demonstrate that this method can always achieve a lower false positive rate and a lower overflow probability bound than CBF in practical systems. We also show how it can be easily implemented in hardware, with limited added complexity and memory overhead. We further explain how this method can extend many variants of CBF that have been published in the literature. Last, using simulations, we show how it can improve the false positive rate of CBFs by up to an order of magnitude given the same amount of memory.},
	booktitle = {2012 {Proceedings} {IEEE} {INFOCOM}},
	author = {Rottenstreich, O. and Kanizo, Y. and Keslassy, I.},
	month = mar,
	year = {2012},
	keywords = {Hardware, Radiation detectors, file organisation, Arrays, Memory management, bloom filter, Complexity theory, counting circuits, digital filters, element deletions, element insertion, Encoding, fast set representations, Filtering algorithms, hashed counters, hashed variable, membership queries, memory overhead, networking device algorithms, overflow probability bound, variable-increment counting bloom filter},
	pages = {1880--1888},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\E6SFQAJL\\6195563.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\SJ7TM5DZ\\Rottenstreich et al. - 2012 - The Variable-Increment Counting Bloom Filter.pdf:application/pdf}
}

@article{moreira_capacity_2012,
	title = {Capacity and {Robustness} {Tradeoffs} in {Bloom} {Filters} for {Distributed} {Applications}},
	volume = {23},
	issn = {1045-9219},
	doi = {10.1109/TPDS.2012.87},
	abstract = {The Bloom filter is a space-efficient data structure often employed in distributed applications to save bandwidth during data exchange. These savings, however, come at the cost of errors in the shared data, which are usually assumed low enough to not disrupt the application. We argue that this assumption does not hold in a more hostile environment, such as the Internet, where attackers can send a carefully crafted Bloom filter in order to break the application. In this paper, we propose the concatenated Bloom filter (CBF), a robust Bloom filter that prevents the attacker from interfering on the shared information, protecting the application data while still providing space efficiency. Instead of using a single large filter, the CBF concatenates small subfilters to improve both the filter robustness and capacity. We propose three CBF variants and provide analytical results that show the efficacy of the CBF for different scenarios. We also evaluate the performance of our filter in an IP traceback application and simulation results confirm the effectiveness of the proposed mechanism in the face of attackers.},
	number = {12},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Moreira, M. D. D. and Laufer, R. P. and Velloso, P. B. and Duarte, O. C. M. B.},
	month = dec,
	year = {2012},
	keywords = {Internet, Radiation detectors, Robustness, data structures, Filters, bloom filter, Error analysis, application data protection, bandwidth saving, CBF, data exchange, distributed applications, distributed processing, Distributed processing, filter capacity improvement, filter robustness improvement, IP traceback, IP traceback application, Network security, Probability, robust concatenated Bloom filter, security, security of data, Servers, space-efficient data structure},
	pages = {2219--2230},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\45BH6ZJ6\\6171165.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\GMSC8DHY\\Moreira et al. - 2012 - Capacity and Robustness Tradeoffs in Bloom Filters.pdf:application/pdf}
}

@inproceedings{bonomi_beyond_2006,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '06},
	title = {Beyond {Bloom} {Filters}: {From} {Approximate} {Membership} {Checks} to {Approximate} {State} {Machines}},
	isbn = {978-1-59593-308-9},
	shorttitle = {Beyond {Bloom} {Filters}},
	doi = {10.1145/1159913.1159950},
	abstract = {Many networking applications require fast state lookups in a concurrent state machine,which tracks the state of a large number of flows simultaneously.We consider the question of how to compactly represent such concurrent state machines. To achieve compactness,we consider data structures for Approximate Concurrent State Machines (ACSMs)that can return false positives,false negatives,or a "don 't know "response.We describe three techniques based on Bloom filters and hashing,and evaluate them using both theoretical analysis and simulation.Our analysis leads us to an extremely efficient hashing-based scheme with several parameters that can be chosen to trade off space,computation,and the pact of errors.Our hashing approach also yields a simple alternative structure with the same functionality as a counting Bloom filter that uses much less space.We show how ACSMs can be used for video congestion control.Using an ACSM,a router can implement sophisticated Active Queue Management (AQM)techniques for video traffic (without the need for standards changes to mark packets or change video formats),with a factor of four reduction in memory compared to full-state schemes and with very little error.We also show that ACSMs show promise for real-time detection of P2P traffic.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the 2006 {Conference} on {Applications}, {Technologies}, {Architectures}, and {Protocols} for {Computer} {Communications}},
	publisher = {ACM},
	author = {Bonomi, Flavio and Mitzenmacher, Michael and Panigrah, Rina and Singh, Sushil and Varghese, George},
	year = {2006},
	keywords = {bloom filter, network flows, state machines},
	pages = {315--326},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\IGD2I9HR\\Bonomi et al. - 2006 - Beyond Bloom Filters From Approximate Membership .pdf:application/pdf}
}

@inproceedings{pagh_optimal_2005,
	address = {Philadelphia, PA, USA},
	series = {{SODA} '05},
	title = {An {Optimal} {Bloom} {Filter} {Replacement}},
	isbn = {978-0-89871-585-9},
	abstract = {This paper considers space-efficient data structures for storing an approximation S' to a set S such that S ⊆ S' and any element not in S belongs to S' with probability at most ∈. The Bloom filter data structure, solving this problem, has found widespread use. Our main result is a new RAM data structure that improves Bloom filters in several ways:• The time for looking up an element in S' is O(1), independent of ∈.• The space usage is within a lower order term of the lower bound.• The data structure uses explicit hash function families.• The data structure supports insertions and deletions on S in amortized expected constant time.The main technical ingredient is a succinct representation of dynamic multisets. We also consider three recent generalizations of Bloom filters.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the {Sixteenth} {Annual} {ACM}-{SIAM} {Symposium} on {Discrete} {Algorithms}},
	publisher = {Society for Industrial and Applied Mathematics},
	author = {Pagh, Anna and Pagh, Rasmus and Rao, S. Srinivasa},
	year = {2005},
	keywords = {bloom filter},
	pages = {823--829},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\3Z7XEAUL\\Pagh et al. - 2005 - An Optimal Bloom Filter Replacement.pdf:application/pdf}
}

@inproceedings{zhu_hierarchical_2004,
	title = {Hierarchical {Bloom} filter arrays ({HBA}): a novel, scalable metadata management system for large cluster-based storage},
	shorttitle = {Hierarchical {Bloom} filter arrays ({HBA})},
	doi = {10.1109/CLUSTR.2004.1392614},
	abstract = {An efficient and distributed scheme for file mapping or file lookup scheme is critical in decentralizing metadata management within a group of metadata servers. This work presents a technique called HBA (hierarchical Bloom filter arrays) to map file names to the servers holding their metadata. Two levels of probabilistic arrays, i.e., Bloom filter arrays, with different accuracies are used on each metadata server. One array, with lower accuracy and representing the distribution of the entire metadata, trades accuracy for significantly reduced memory overhead, while the other array, with higher accuracy, caches partial distribution information and exploits the temporal locality of file access patterns. Extensive trace-driven simulations have shown our HBA design to be highly effective and efficient in improving performance and scalability of file systems in clusters with 1,000 to 10,000 nodes (or superclusters).},
	booktitle = {2004 {IEEE} {International} {Conference} on {Cluster} {Computing} ({IEEE} {Cat}. {No}.04EX935)},
	author = {Zhu, Yifeng and Jiang, Hong and Wang, J.},
	month = sep,
	year = {2004},
	keywords = {Bandwidth, Engineering management, Computer science, file organisation, High performance computing, Information filtering, bloom filter, Computer networks, trace-driven simulations, Information filters, distributed processing, arrays, digital storage, file access patterns, file lookup, file mapping, File servers, hierarchical Bloom filter arrays, Image storage, large cluster-based storage, memory architecture, meta data, metadata management, metadata server, partial distribution information caching, probabilistic arrays, Scalability, superclusters, temporal locality, workstation clusters},
	pages = {165--174},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\4JLCQS5N\\1392614.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\GVWGZHCT\\Zhu et al. - 2004 - Hierarchical Bloom filter arrays (HBA) a novel, s.pdf:application/pdf}
}

@inproceedings{debnath_bloomflash:_2011,
	title = {{BloomFlash}: {Bloom} {Filter} on {Flash}-{Based} {Storage}},
	shorttitle = {{BloomFlash}},
	doi = {10.1109/ICDCS.2011.44},
	abstract = {The bloom filter is a probabilistic data structure that provides a compact representation of a set of elements. To keep false positive probabilities low, the size of the bloom filter must be dimensioned a priori to be linear in the maximum number of keys inserted, with the linearity constant ranging typically from one to few bytes. A bloom filter is most commonly used as an in memory data structure, hence its size is limited by the availability of RAM space on the machine. As datasets have grown over time to Internet scale, so have the RAM space requirements of bloom filters. If sufficient RAM space is not available, we advocate that flash memory may serve as a suitable medium for storing bloom filters, since it is about one-tenth the cost of RAM per GB while still providing access times orders of magnitude faster than hard disk. We present BLOOMFLASH, a bloom filter designed for flash memory based storage, that provides a new dimension of trade off with bloom filter access times to reduce RAM space usage (and hence system cost). The simple design of a single flat bloom filter on flash suffers from many performance bottlenecks, including in-place bit updates that are inefficient on flash and multiple reads and random writes spread out across many flash pages for a single lookup or insert operation. To mitigate these performance bottlenecks, BLOOMFLASH leverages two key design innovations: (i) buffering bit updates in RAM and applying them in bulk to flash that helps to reduce random writes to flash, and (ii) a hierarchical bloom filter design consisting of component bloom filters, stored one per flash page, that helps to localize reads and writes on flash. We use two real-world data traces taken from representative bloom filter applications to drive and evaluate our design. BLOOMFLASH achieves bloom filter access times in the range of few tens of microseconds, thus allowing up to order of tens of thousands operations per sec.},
	booktitle = {2011 31st {International} {Conference} on {Distributed} {Computing} {Systems}},
	author = {Debnath, B. and Sengupta, S. and Li, J. and Lilja, D. J. and Du, D. H. C.},
	month = jun,
	year = {2011},
	keywords = {Performance evaluation, data structures, Data structures, Random access memory, bloom filter, Servers, Ash, BloomFlash, Buffer storage, buffering bit updates, Data Center Applications, flash memories, Flash Memory, flash memory based storage, flash-based storage, Games, hierarchical bloom filter design, Hierarchical Design, Internet scale, memory data structure, probabilistic data structure, RAM space, RAM space usage reduction, random-access storage, Solid State Disk (SSD), storage management},
	pages = {635--644},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\Y89QIAW5\\5961740.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\Z55Y49X8\\Debnath et al. - 2011 - BloomFlash Bloom Filter on Flash-Based Storage.pdf:application/pdf}
}

@inproceedings{kirsch_less_2006,
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {Less {Hashing}, {Same} {Performance}: {Building} a {Better} {Bloom} {Filter}},
	isbn = {978-3-540-38875-3 978-3-540-38876-0},
	shorttitle = {Less {Hashing}, {Same} {Performance}},
	doi = {10.1007/11841036_42},
	abstract = {A standard technique from the hashing literature is to use two hash functions h 1(x) and h 2(x) to simulate additional hash functions of the form g i (x) = h 1(x) + ih 2(x). We demonstrate that this technique can be usefully applied to Bloom filters and related data structures. Specifically, only two hash functions are necessary to effectively implement a Bloom filter without any loss in the asymptotic false positive probability. This leads to less computation and potentially less need for randomness in practice.},
	language = {en},
	urldate = {2018-05-02},
	booktitle = {Algorithms – {ESA} 2006},
	publisher = {Springer, Berlin, Heidelberg},
	author = {Kirsch, Adam and Mitzenmacher, Michael},
	month = sep,
	year = {2006},
	keywords = {bloom filter},
	pages = {456--467},
	file = {Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\GFBA6VIG\\Kirsch and Mitzenmacher - 2006 - Less Hashing, Same Performance Building a Better .pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\6BSGHIHQ\\11841036_42.html:text/html}
}

@inproceedings{chazelle_bloomier_2004,
	address = {Philadelphia, PA, USA},
	series = {{SODA} '04},
	title = {The {Bloomier} {Filter}: {An} {Efficient} {Data} {Structure} for {Static} {Support} {Lookup} {Tables}},
	isbn = {978-0-89871-558-3},
	shorttitle = {The {Bloomier} {Filter}},
	abstract = {We introduce the Bloomier filter, a data structure for compactly encoding a function with static support in order to support approximate evaluation queries. Our construction generalizes the classical Bloom filter, an ingenious hashing scheme heavily used in networks and databases, whose main attribute---space efficiency---is achieved at the expense of a tiny false-positive rate. Whereas Bloom filters can handle only set membership queries, our Bloomier filters can deal with arbitrary functions. We give several designs varying in simplicity and optimality, and we provide lower bounds to prove the (near) optimality of our constructions.},
	urldate = {2018-05-02},
	booktitle = {Proceedings of the {Fifteenth} {Annual} {ACM}-{SIAM} {Symposium} on {Discrete} {Algorithms}},
	publisher = {Society for Industrial and Applied Mathematics},
	author = {Chazelle, Bernard and Kilian, Joe and Rubinfeld, Ronitt and Tal, Ayellet},
	year = {2004},
	keywords = {bloom filter},
	pages = {30--39},
	file = {ACM Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\W2XEKS9Z\\Chazelle et al. - 2004 - The Bloomier Filter An Efficient Data Structure f.pdf:application/pdf}
}

@inproceedings{hua_multi-attribute_2006,
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {A {Multi}-attribute {Data} {Structure} with {Parallel} {Bloom} {Filters} for {Network} {Services}},
	isbn = {978-3-540-68039-0 978-3-540-68040-6},
	doi = {10.1007/11945918_30},
	abstract = {A Bloom filter has been widely utilized to represent a set of items because it is a simple space-efficient randomized data structure. In this paper, we propose a new structure to support the representation of items with multiple attributes based on Bloom filters. The structure is composed of Parallel Bloom Filters (PBF) and a hash table to support the accurate and efficient representation and query of items. The PBF is a counter-based matrix and consists of multiple submatrixes. Each submatrix can store one attribute of an item. The hash table as an auxiliary structure captures a verification value of an item, which can reflect the inherent dependency of all attributes for the item. Because the correct query of an item with multiple attributes becomes complicated, we use a two-step verification process to ensure the presence of a particular item to reduce false positive probability.},
	language = {en},
	urldate = {2018-05-02},
	booktitle = {High {Performance} {Computing} - {HiPC} 2006},
	publisher = {Springer, Berlin, Heidelberg},
	author = {Hua, Yu and Xiao, Bin},
	month = dec,
	year = {2006},
	keywords = {bloom filter},
	pages = {277--288},
	file = {Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\4ITD4VH5\\Hua and Xiao - 2006 - A Multi-attribute Data Structure with Parallel Blo.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\XXYCPPX6\\11945918_30.html:text/html}
}

@article{xiao_using_2010,
	title = {Using {Parallel} {Bloom} {Filters} for {Multiattribute} {Representation} on {Network} {Services}},
	volume = {21},
	issn = {1045-9219},
	doi = {10.1109/TPDS.2009.39},
	abstract = {One widely used mechanism for representing membership of a set of items is the simple space-efficient randomized data structure known as Bloom filters. Yet, Bloom filters are not entirely suitable for many new network applications that support network services like the representation and querying of items that have multiple attributes as opposed to a single attribute. In this paper, we present an approach to the accurate and efficient representation and querying of multiattribute items using Bloom filters. The approach proposes three variant structures of Bloom filters: parallel Bloom filter (referred as PBF) structure, PBF with a hash table (PBF-HT), and PBF with a Bloom filter (PBF-BF). PBF stores multiple attributes of an item in parallel Bloom filters. The auxiliary HT and BF provide functions to capture the inherent dependency of all attributes of an item. Compared to standard Bloom filters to represent items with multiple attributes, the proposed PBF facilitates much faster query service and both PBF-HT and PBF-BF structures achieve much lower false positive probability with a result to save storage space. Simulation and experimental results demonstrate that the new space-efficient Bloom filter structures can efficiently and accurately represent multiattribute items and quickly respond queries at the cost of a relatively small false positive probability.},
	number = {1},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Xiao, B. and Hua, Y.},
	month = jan,
	year = {2010},
	keywords = {file organisation, bloom filter, probability, hash table, information filtering, query processing, data structure., false positives, multiattribute items, multiattribute representation, network service, Network services, parallel Bloom filter, parallel Bloom filters, parallel processing, PBF-BF structure, PBF-HT structure, query service},
	pages = {20--32},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\WE45943J\\4798158.html:text/html;IEEE Xplore Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\2VINUX96\\Xiao and Hua - 2010 - Using Parallel Bloom Filters for Multiattribute Re.pdf:application/pdf}
}

@inproceedings{porat_optimal_2009,
	series = {Lecture {Notes} in {Computer} {Science}},
	title = {An {Optimal} {Bloom} {Filter} {Replacement} {Based} on {Matrix} {Solving}},
	isbn = {978-3-642-03350-6 978-3-642-03351-3},
	doi = {10.1007/978-3-642-03351-3_25},
	abstract = {We suggest a method for holding a dictionary data structure, which maps keys to values, in the spirit of Bloom Filters. The space requirements of the dictionary we suggest are much smaller than those of a hashtable. We allow storing n keys, each mapped to value which is a string of k bits. Our suggested method requires nk + o(n) bits space to store the dictionary, and O(n) time to produce the data structure, and allows answering a membership query in O(1) memory probes. The dictionary size does not depend on the size of the keys. However, reducing the space requirements of the data structure comes at a certain cost. Our dictionary has a small probability of a one sided error. When attempting to obtain the value for a key that is stored in the dictionary we always get the correct answer. However, when testing for membership of an element that is not stored in the dictionary, we may get an incorrect answer, and when requesting the value of such an element we may get a certain random value. Our method is based on solving equations in GF(2 k ) and using several hash functions.Another significant advantage of our suggested method is that we do not require using sophisticated hash functions. We only require pairwise independent hash functions. We also suggest a data structure that requires only nk bits space, has O(n 2) preprocessing time, and has a O(logn) query time. However, this data structures requires a uniform hash functions.In order replace a Bloom Filter of n elements with an error proability of 2− k , we require nk + o(n) memory bits, O(1) query time, O(n) preprocessing time, and only pairwise independent hash function. Even the most advanced previously known Bloom Filter would require nk + O(n) space, and a uniform hash functions, so our method is significantly less space consuming especially when k is small.Our suggested dictionary can replace Bloom Filters, and has many applications. A few application examples are dictionaries for storing bad passwords, differential files in databases, Internet caching and distributed storage systems.},
	language = {en},
	urldate = {2018-05-02},
	booktitle = {Computer {Science} - {Theory} and {Applications}},
	publisher = {Springer, Berlin, Heidelberg},
	author = {Porat, Ely},
	month = aug,
	year = {2009},
	keywords = {bloom filter},
	pages = {263--273},
	file = {Full Text PDF:C\:\\Users\\zzy\\Zotero\\storage\\59CUD36K\\Porat - 2009 - An Optimal Bloom Filter Replacement Based on Matri.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\IZEH4WJD\\978-3-642-03351-3_25.html:text/html}
}

@article{kubiatowicz_oceanstore:_2000,
	title = {Oceanstore: {An} architecture for global-scale persistent storage},
	volume = {34},
	shorttitle = {Oceanstore},
	number = {5},
	journal = {ACM SIGOPS Operating Systems Review},
	author = {Kubiatowicz, John and Bindel, David and Chen, Yan and Czerwinski, Steven and Eaton, Patrick and Geels, Dennis and Gummadi, Ramakrishna and Rhea, Sean and Weatherspoon, Hakim and Wells, Chris},
	year = {2000},
	pages = {190--201},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\MYLUXEBB\\Kubiatowicz et al. - 2000 - Oceanstore An architecture for global-scale persi.pdf:application/pdf}
}

@inproceedings{dillinger_bloom_2004,
	title = {Bloom {Filters} in {Probabilistic} {Verification}},
	booktitle = {International {Conference} on {Formal} {Methods} in {Computer}-{Aided} {Design}},
	publisher = {Springer},
	author = {Dillinger, Peter C. and Manolios, Panagiotis},
	year = {2004},
	keywords = {bloom filter},
	pages = {367--381},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\KPIJ73J9\\Dillinger and Manolios - 2004 - Bloom filters in probabilistic verification.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\3R6PWU3G\\978-3-540-30494-4_26.html:text/html}
}

@inproceedings{deng_approximately_2006,
	title = {Approximately detecting duplicates for streaming data using stable bloom filters},
	booktitle = {Proceedings of the 2006 {ACM} {SIGMOD} international conference on {Management} of data},
	publisher = {ACM},
	author = {Deng, Fan and Rafiei, Davood},
	year = {2006},
	keywords = {bloom filter},
	pages = {25--36},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\JALK97XD\\Deng and Rafiei - 2006 - Approximately detecting duplicates for streaming d.pdf:application/pdf}
}

@inproceedings{kirsch_distance-sensitive_2006,
	title = {Distance-sensitive bloom filters},
	booktitle = {2006 {Proceedings} of the {Eighth} {Workshop} on {Algorithm} {Engineering} and {Experiments} ({ALENEX})},
	publisher = {SIAM},
	author = {Kirsch, Adam and Mitzenmacher, Michael},
	year = {2006},
	keywords = {bloom filter},
	pages = {41--50},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\QPHGBQCX\\Kirsch and Mitzenmacher - 2006 - Distance-sensitive bloom filters.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\SNHDRL8X\\1.9781611972863.html:text/html}
}

@article{laufer_generalized_2005,
	title = {Generalized bloom filters},
	journal = {Electrical Engineering Program, COPPE/UFRJ, Tech. Rep. GTA-05-43},
	author = {Laufer, Rafael P. and Velloso, Pedro B. and Duarte, OCMB},
	year = {2005},
	keywords = {bloom filter},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\QN3WUP7V\\Laufer et al. - 2005 - Generalized bloom filters.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\APDXXVEV\\Laufer et al. - 2005 - Generalized bloom filters.pdf:application/pdf}
}

@inproceedings{guo_theory_2006,
	title = {Theory and network applications of dynamic bloom filters},
	booktitle = {{INFOCOM} 2006. 25th {IEEE} {International} {Conference} on {Computer} {Communications}. {Proceedings}},
	publisher = {IEEE},
	author = {Guo, Deke and Wu, Jie and Chen, Honghui and Luo, Xueshan},
	year = {2006},
	keywords = {bloom filter},
	pages = {1--12},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\7YFSNR5J\\Guo et al. - 2006 - Theory and network applications of dynamic bloom f.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\8EIXQM8Z\\Guo et al. - 2006 - Theory and network applications of dynamic bloom f.pdf:application/pdf}
}

@inproceedings{faloutsos_power-law_1999,
	title = {On power-law relationships of the internet topology},
	volume = {29},
	booktitle = {{ACM} {SIGCOMM} computer communication review},
	publisher = {ACM},
	author = {Faloutsos, Michalis and Faloutsos, Petros and Faloutsos, Christos},
	year = {1999},
	pages = {251--262}
}

@inproceedings{hao_incremental_2008,
	title = {Incremental bloom filters},
	booktitle = {{INFOCOM} 2008. {The} 27th {Conference} on {Computer} {Communications}. {IEEE}},
	publisher = {IEEE},
	author = {Hao, Fang and Kodialam, Murali and Lakshman, T. V.},
	year = {2008},
	keywords = {bloom filter},
	pages = {1067--1075},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\RPYWK9K5\\4509756.html:text/html}
}

@inproceedings{wang_i-dbf:_2006,
	title = {i-{DBF}: an {Improved} {Bloom} {Filter} {Representation} {Method} on {Dynamic} {Set}},
	shorttitle = {i-{DBF}},
	doi = {10.1109/GCCW.2006.53},
	abstract = {Bloom filter is a simple space-efficient randomized data structure for representing a set in order to support membership queries, which uses an m-bit array to represent a data set. Dynamic bloom filter (DBF) can support concisely representation and approximate membership queries of dynamic set instead of static set. It has been proved that DBF not only possess the advantage of standard bloom filter, but also has better features when dealing with dynamic set. But DBF also has a disadvantage: the addition operation which mapped element x into bloom filter s will become no sense, if some of the first s-1 bloom filters have already responded that element x is in set A with some false positive probability. We point out this shortcoming and improve the addition operation with a new algorithm. We call this improved dynamic bloom filter i-DBF. Finally, we prove that this i-DBF has better performance both in the storage space and in the false positive probability},
	booktitle = {2006 {Fifth} {International} {Conference} on {Grid} and {Cooperative} {Computing} {Workshops}},
	author = {Wang, J. and Xiao, M. and Jiang, J. and Min, B.},
	month = oct,
	year = {2006},
	keywords = {Bandwidth, Protocols, data structures, Data structures, Information filtering, bloom filter, Costs, Computer networks, Peer to peer computing, probability, Information filters, false positive probability, approximation theory, bloom filter representation, Grid computing, i-DBF, Laboratories, membership queries approximation, randomized data structure},
	pages = {156--162},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\32NIHP3I\\4031546.html:text/html;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\PN9HZTTH\\4031546.html:text/html;Wang et al_2006_i-DBF.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ARM4EMFR\\Wang et al_2006_i-DBF.pdf:application/pdf}
}

@article{papapetrou_cardinality_2010,
	title = {Cardinality estimation and dynamic length adaptation for {Bloom} filters},
	volume = {28},
	issn = {0926-8782, 1573-7578},
	doi = {10.1007/s10619-010-7067-2},
	abstract = {Bloom filters are extensively used in distributed applications, especially in distributed databases and distributed information systems, to reduce network requirements and to increase performance. In this work, we propose two novel Bloom filter features that are important for distributed databases and information systems. First, we present a new approach to encode a Bloom filter such that its length can be adapted to the cardinality of the set it represents, with negligible overhead with respect to computation and false positive probability. The proposed encoding allows for significant network savings in distributed databases, as it enables the participating nodes to optimize the length of each Bloom filter before sending it over the network, for example, when executing Bloom joins. Second, we show how to estimate the number of distinct elements in a Bloom filter, for situations where the represented set is not materialized. These situations frequently arise in distributed databases, where estimating the cardinality of the represented sets is necessary for constructing an efficient query plan. The estimation is highly accurate and comes with tight probabilistic bounds. For both features we provide a thorough probabilistic analysis and extensive experimental evaluation which confirm the effectiveness of our approaches.},
	language = {en},
	number = {2-3},
	urldate = {2018-05-04},
	journal = {Distrib Parallel Databases},
	author = {Papapetrou, Odysseas and Siberski, Wolf and Nejdl, Wolfgang},
	month = dec,
	year = {2010},
	pages = {119--156},
	file = {Papapetrou et al_2010_Cardinality estimation and dynamic length adaptation for Bloom filters.pdf:C\:\\Users\\zzy\\Zotero\\storage\\8372QPVG\\Papapetrou et al_2010_Cardinality estimation and dynamic length adaptation for Bloom filters.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\W3HRBT3R\\s10619-010-7067-2.html:text/html}
}

@article{hao_fast_2012,
	title = {Fast {Dynamic} {Multiple}-set {Membership} {Testing} {Using} {Combinatorial} {Bloom} {Filters}},
	volume = {20},
	issn = {1063-6692},
	doi = {10.1109/TNET.2011.2173351},
	abstract = {In this paper, we consider the problem of designing a data structure that can perform fast multiple-set membership testing in deterministic time. Our primary goal is to develop a hardware implementation of the data structure that uses only embedded memory blocks. Prior efforts to solve this problem involve hashing into multiple Bloom filters. Such approach needs a priori knowledge of the number of elements in each set in order to size the Bloom filter. We use a single-Bloom-filter-based approach and use multiple sets of hash functions to code for the set (group) id. Since a single Bloom filter is used, it does not need a priori knowledge of the distribution of the elements across the different sets. We show how to improve the performance of the data structure by using constant-weight error-correcting codes for coding the group id. Using error-correcting codes improves the performance of these data structures especially when there are a large number of sets. We also outline an efficient hardware-based approach to generate the large number of hash functions that we need for this data structure. The resulting data structure, COMB, is amenable to a variety of time-critical network applications.},
	number = {1},
	urldate = {2018-05-04},
	journal = {IEEE/ACM Trans. Netw.},
	author = {Hao, Fang and Kodialam, Murali and Lakshman, T. V. and Song, Haoyu},
	month = feb,
	year = {2012},
	keywords = {computer networks, data structure, bloom filter},
	pages = {295--304},
	file = {Hao et al_2012_Fast Dynamic Multiple-set Membership Testing Using Combinatorial Bloom Filters.pdf:C\:\\Users\\zzy\\Zotero\\storage\\XYCZSND7\\Hao et al_2012_Fast Dynamic Multiple-set Membership Testing Using Combinatorial Bloom Filters.pdf:application/pdf}
}

@book{botta_d-itg_2013,
	title = {D-{ITG} 2.8.1 {Manual}},
	language = {en},
	author = {Botta, Alessio and Avallone, Stefano and Pescape, Antonio},
	year = {2013},
	file = {Botta et al. - COMICS (COMputer for Interaction and Communication.pdf:C\:\\Users\\zzy\\Zotero\\storage\\45GRI54V\\Botta et al. - COMICS (COMputer for Interaction and Communication.pdf:application/pdf}
}

@book{noauthor_open_2018,
	title = {Open {vSwitch} {Documentation}},
	url = {https://media.readthedocs.org/pdf/openvswitch/latest/openvswitch.pdf},
	urldate = {2018-05-06},
	year = {2018},
	file = {openvswitch.pdf:C\:\\Users\\zzy\\Zotero\\storage\\T4T78T7N\\openvswitch.pdf:application/pdf}
}

@inproceedings{shafer_axon:_2010,
	title = {Axon: {A} flexible substrate for source-routed {Ethernet}},
	shorttitle = {Axon},
	booktitle = {Proceedings of the 6th {ACM}/{IEEE} {Symposium} on {Architectures} for {Networking} and {Communications} {Systems}},
	publisher = {ACM},
	author = {Shafer, Jeffrey and Stephens, Brent and Foss, Michael and Rixner, Scott and Cox, Alan L.},
	year = {2010},
	pages = {22},
	file = {Shafer et al_2010_Axon.pdf:C\:\\Users\\zzy\\Zotero\\storage\\NMD2PLUS\\Shafer et al_2010_Axon.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\DM29IFBK\\citation.html:text/html;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\W7U7NSXB\\citation.html:text/html}
}

@book{noauthor_p4_16_2017,
	title = {The {P}4\_16 {Language} {Specification}},
	url = {https://p4.org/p4-spec/docs/P4-16-v1.0.0-spec.pdf},
	urldate = {2018-05-23},
	year = {2017},
	file = {P4-16-v1.0.0-spec.pdf:C\:\\Users\\zzy\\Zotero\\storage\\Z397G63Z\\P4-16-v1.0.0-spec.pdf:application/pdf}
}

@book{noauthor_p4_16_2018,
	title = {P4\_16 {Portable} {Switch} {Architecture} ({PSA})},
	url = {https://p4.org/p4-spec/docs/PSA-v1.0.0.pdf},
	urldate = {2018-05-23},
	year = {2018},
	file = {PSA-v1.0.0.pdf:C\:\\Users\\zzy\\Zotero\\storage\\RIZKQQGZ\\PSA-v1.0.0.pdf:application/pdf}
}

@book{noauthor_p4_14_2017,
	title = {The {P}4\_14 {Language} {Specification}},
	url = {https://p4.org/p4-spec/p4-14/v1.0.4/tex/p4.pdf},
	urldate = {2018-05-23},
	year = {2017},
	file = {p4.pdf:C\:\\Users\\zzy\\Zotero\\storage\\CQDX2ABR\\p4.pdf:application/pdf}
}

@book{noauthor_-band_2018,
	title = {In-band {Network} {Telemetry} ({INT}) {Dataplane} {Specification}},
	year = {2018},
	file = {INT.pdf:C\:\\Users\\zzy\\Zotero\\storage\\5CABVFD5\\INT.pdf:application/pdf}
}

@inproceedings{tune_towards_2008,
	title = {Towards {Optimal} {Sampling} for {Flow} {Size} {Estimation}},
	isbn = {978-1-60558-334-1},
	doi = {10.1145/1452520.1452550},
	abstract = {The flow size distribution is a useful metric for traffic modeling and management. It is well known however that its estimation based on sampled data is problematic. Previous work has shown that flow sampling (FS) offers enormous statistical benefits over packet sampling, however it suffers from high resource requirements and is not currently used in routers. In this paper we present Dual Sampling, which can to a large extent provide flow-sampling-like statistical performance for packet-sampling-like computational cost. Our work is grounded in a Fisher information based approach recently used to evaluate a number of sampling schemes, excluding however FS, for TCP flows. We show how to revise and extend the approach to include FS as well as DS and others, and how to make rigorous and fair comparisons. We show how DS significantly outperforms other packet based methods, but also prove that DS is inferior to flow sampling. However, since DS is a two-parameter family of methods which includes FS as a special case, DS can be used to approach flow sampling continuously. We then describe a packet sampling based implementation of DS and analyze its key computational costs to show that router implementation is feasible. Our approach offers insights into many issues, including how the notions of 'flow quality' and 'packet gain' can be used to understand the relative performance of methods, and how the problem of optimal sampling can be formulated. Our work is theoretical with some simulation support and a case study on Internet data.},
	urldate = {2018-05-27},
	booktitle = {Proceedings of the 8th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	author = {Tune, Paul and Veitch, Darryl},
	year = {2008},
	keywords = {sampling, flow size distribution, fisher information, internet measurement, routers},
	pages = {243--256},
	file = {Tune_Veitch_2008_Towards Optimal Sampling for Flow Size Estimation.pdf:C\:\\Users\\zzy\\Zotero\\storage\\8H54XG9Q\\Tune_Veitch_2008_Towards Optimal Sampling for Flow Size Estimation.pdf:application/pdf}
}

@inproceedings{ribeiro_fisher_2006,
	series = {{IMC} '06},
	title = {Fisher {Information} of {Sampled} {Packets}: {An} {Application} to {Flow} {Size} {Estimation}},
	isbn = {978-1-59593-561-8},
	shorttitle = {Fisher {Information} of {Sampled} {Packets}},
	doi = {10.1145/1177080.1177083},
	abstract = {Packet sampling is widely used in network monitoring. Sampled packet streams are often used to determine flow-level statistics of network traffic. To date there is conflicting evidence on the quality of the resulting estimates. In this paper we take a systematic approach, using the Fisher information metric and the Cramér-Rao bound, to understand the contributions that different types of information within sampled packets have on the quality of flow-level estimates. We provide concrete evidence that, without protocol information and with packet sampling rate p = 0.005, any accurate unbiased estimator needs approximately 1016 sampled flows. The required number of sampled flows drops to roughly 104 with the use of TCP sequence numbers. Furthermore, additional SYN flag information significantly reduces the estimation error of short flows. We present a Maximum Likelihood Estimator (MLE) that relies on all of this information and show that it is efficient, even when applied to a small sample set. We validate our results using Tier-1 Internet backbone traces and evaluate the benefits of sampling from multiple monitors. Our results show that combining estimates from several monitors is 50\% less accurate than an estimate based on all samples.},
	urldate = {2018-05-27},
	booktitle = {Proceedings of the 6th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	author = {Ribeiro, Bruno and Towsley, Don and Ye, Tao and Bolot, Jean C.},
	year = {2006},
	keywords = {maximum likelihood estimation, sampling, efficient estimator, Fisher information, flow size distribution, probabilistic sampling},
	pages = {15--26},
	file = {Ribeiro et al_2006_Fisher Information of Sampled Packets.pdf:C\:\\Users\\zzy\\Zotero\\storage\\CSG2JKL4\\Ribeiro et al_2006_Fisher Information of Sampled Packets.pdf:application/pdf}
}

@article{mitzenmacher_power_2001,
	title = {The {Power} of {Two} {Choices} in {Randomized} {Load} {Balancing}},
	volume = {12},
	issn = {1045-9219},
	doi = {10.1109/71.963420},
	abstract = {We consider the following natural model: customers arrive as a Poisson stream of rate λn, λ{\textless}1, at a collection of n servers. Each customer chooses some constant d servers independently and uniformly at random from the n servers and waits for service at the one with the fewest customers. Customers are served according to the first-in first-out (FIFO) protocol and the service time for a customer is exponentially distributed with mean 1. We call this problem the supermarket model. We wish to know how the system behaves and in particular we are interested in the effect that the parameter d has on the expected time a customer spends in the system in equilibrium. Our approach uses a limiting, deterministic model representing the behavior as n→∞ to approximate the behavior of finite systems. The analysis of the deterministic model is interesting in its own right. Along with a theoretical justification of this approach, we provide simulations that demonstrate that the method accurately predicts system behavior, even for relatively small systems. Our analysis provides surprising implications. Having d=2 choices leads to exponential improvements in the expected time a customer spends in the system over d=1, whereas having d=3 choices is only a constant factor better than d=2. We discuss the possible implications for system design},
	number = {10},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Mitzenmacher, M.},
	month = oct,
	year = {2001},
	keywords = {resource allocation, Protocols, Load management, distributed systems, Computer applications, deterministic model, Differential equations, distributed algorithms, exponential distribution, finite systems, first-in first-out protocol, H infinity control, limiting systems, Load modeling, Poisson stream, Predictive models, Queueing analysis, queueing theory, queuing theory, randomized load balancing, Resource management, service time, simulations, stochastic processes, supermarket model, System analysis and design, power of two choices},
	pages = {1094--1104},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\JVWFF6KA\\963420.html:text/html;Mitzenmacher_2001_The power of two choices in randomized load balancing.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SN3VKDSV\\Mitzenmacher_2001_The power of two choices in randomized load balancing.pdf:application/pdf}
}

@inproceedings{byers_geometric_2004,
	series = {{SPAA} '04},
	title = {Geometric {Generalizations} of the {Power} of {Two} {Choices}},
	isbn = {978-1-58113-840-5},
	doi = {10.1145/1007912.1007921},
	abstract = {A well-known paradigm for load balancing in parallel and distributed systems is the "power of two choices," whereby an item is stored at the less loaded of two (or more) random alternative servers. We investigate the power of two choices in natural settings where items and servers reside in a geometric space and each item is associated with the server that is its nearest neighbor. This is the setting for example in the Chord distributed hash table, where the geometric space is determined by clockwise distance on a one-dimensional ring. For example, our analysis shows that when \$n\$ items are placed at n servers with d choices per item, the maximum load at any server is log log n/ log d + O(1) with high probability, only an additive constant more than when servers are chosen uniformly at random. Our proofs are quite general, showing that the power of two choices works under a variety of distributions, with most geometric constructions having at most an additive O(1) penalty. We also show that these techniques still work under highly unbalanced distributions, and give sharp bounds on the necessary number of choices. Finally, we provide simulation results demonstrating the load balance that results as the system size scales into the millions.},
	urldate = {2018-05-27},
	booktitle = {Proceedings of the {Sixteenth} {Annual} {ACM} {Symposium} on {Parallelism} in {Algorithms} and {Architectures}},
	author = {Byers, John W. and Considine, Jeffrey and Mitzenmacher, Michael},
	year = {2004},
	keywords = {distributed hash tables, load balancing, two choices, power of two choices},
	pages = {54--63},
	file = {Byers et al_2004_Geometric Generalizations of the Power of Two Choices.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SYBZ7SW4\\Byers et al_2004_Geometric Generalizations of the Power of Two Choices.pdf:application/pdf}
}

@inproceedings{doerr_stabilizing_2011,
	series = {{SPAA} '11},
	title = {Stabilizing {Consensus} with the {Power} of {Two} {Choices}},
	isbn = {978-1-4503-0743-7},
	doi = {10.1145/1989493.1989516},
	abstract = {In the standard consensus problem there are n processes with possibly different input values and the goal is to eventually reach a point at which all processes commit to exactly one of these values. We are studying a slight variant of the consensus problem called the stabilizing consensus problem [2]. In this problem, we do not require that each process commits to a final value at some point, but that eventually they arrive at a common, stable value without necessarily being aware of that. This should work irrespective of the states in which the processes are starting. Our main result is a simple randomized algorithm called median rule that, with high probability, just needs O(log m log log n + log n) time and work per process to arrive at an almost stable consensus for any set of m legal values as long as an adversary can corrupt the states of at most √n processes at any time. Without adversarial involvement, just O(log n) time and work is needed for a stable consensus, with high probability. As a by-product, we obtain a simple distributed algorithm for approximating the median of n numbers in time O(log m log log n + log n) under adversarial presence.},
	urldate = {2018-05-27},
	booktitle = {Proceedings of the {Twenty}-third {Annual} {ACM} {Symposium} on {Parallelism} in {Algorithms} and {Architectures}},
	author = {Doerr, Benjamin and Goldberg, Leslie Ann and Minder, Lorenz and Sauerwald, Thomas and Scheideler, Christian},
	year = {2011},
	keywords = {distributed consensus, randomized algorithms, self-stabilization, power of two choices},
	pages = {149--158},
	file = {Doerr et al_2011_Stabilizing Consensus with the Power of Two Choices.pdf:C\:\\Users\\zzy\\Zotero\\storage\\XCVBQAK6\\Doerr et al_2011_Stabilizing Consensus with the Power of Two Choices.pdf:application/pdf}
}

@misc{noauthor_user_nodate,
	title = {User {Documentation} nfdump \& {NfSen}},
	file = {haag-peter-papers.pdf:C\:\\Users\\zzy\\Zotero\\storage\\E4A6JQMG\\haag-peter-papers.pdf:application/pdf}
}

@inproceedings{bonomi_bloom_2006,
	title = {Bloom {Filters} via {D}-{Left} {Hashing} and {Dynamic} {Bit} {Reassignment} {Extended} {Abstract}},
	booktitle = {Forty-{Fourth} {Annual} {Allerton} {Conf}., {Illinois}, {USA}},
	author = {Bonomi, Flavio and Mitzenmacher, Michael and Panigraphy, Rina and Singh, Sushil and Varghese, George},
	year = {2006},
	keywords = {bloom filter},
	pages = {877--883},
	file = {Bonomi et al_2006_Bloom filters via d-left hashing and dynamic bit reassignment extended abstract.pdf:C\:\\Users\\zzy\\Zotero\\storage\\WP3Z3EHT\\Bonomi et al_2006_Bloom filters via d-left hashing and dynamic bit reassignment extended abstract.pdf:application/pdf}
}

@inproceedings{cheng_catch_2014,
	title = {Catch the {Whole} {Lot} in an {Action}: {Rapid} {Precise} {Packet} {Loss} {Notification} in {Data} {Center}.},
	shorttitle = {Catch the {Whole} {Lot} in an {Action}},
	booktitle = {{NSDI}},
	author = {Cheng, Peng and Ren, Fengyuan and Shu, Ran and Lin, Chuang},
	year = {2014},
	pages = {17--28},
	file = {Cheng et al_2014_Catch the Whole Lot in an Action.pdf:C\:\\Users\\zzy\\Zotero\\storage\\MNKMGNP7\\Cheng et al_2014_Catch the Whole Lot in an Action.pdf:application/pdf;Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\AW6RY8UP\\Cheng et al. - 2014 - Catch the Whole Lot in an Action Rapid Precise Pa.pdf:application/pdf}
}

@inproceedings{decandia_dynamo:_2007,
	title = {Dynamo: amazon's highly available key-value store},
	volume = {41},
	shorttitle = {Dynamo},
	booktitle = {{ACM} {SIGOPS} operating systems review},
	publisher = {ACM},
	author = {DeCandia, Giuseppe and Hastorun, Deniz and Jampani, Madan and Kakulapati, Gunavardhan and Lakshman, Avinash and Pilchin, Alex and Sivasubramanian, Swaminathan and Vosshall, Peter and Vogels, Werner},
	year = {2007},
	pages = {205--220},
	file = {DeCandia et al_2007_Dynamo.pdf:C\:\\Users\\zzy\\Zotero\\storage\\E7SAEW25\\DeCandia et al_2007_Dynamo.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\X46HA2W2\\citation.html:text/html}
}

@inproceedings{eppstein_whats_2011,
	address = {New York, NY, USA},
	series = {{SIGCOMM} '11},
	title = {What's the {Difference}?: {Efficient} {Set} {Reconciliation} {Without} {Prior} {Context}},
	isbn = {978-1-4503-0797-0},
	shorttitle = {What's the {Difference}?},
	doi = {10.1145/2018436.2018462},
	abstract = {We describe a synopsis structure, the Difference Digest, that allows two nodes to compute the elements belonging to the set difference in a single round with communication overhead proportional to the size of the difference times the logarithm of the keyspace. While set reconciliation can be done efficiently using logs, logs require overhead for every update and scale poorly when multiple users are to be reconciled. By contrast, our abstraction assumes no prior context and is useful in networking and distributed systems applications such as trading blocks in a peer-to-peer network, and synchronizing link-state databases after a partition. Our basic set-reconciliation method has a similarity with the peeling algorithm used in Tornado codes [6], which is not surprising, as there is an intimate connection between set difference and coding. Beyond set reconciliation, an essential component in our Difference Digest is a new estimator for the size of the set difference that outperforms min-wise sketches [3] for small set differences. Our experiments show that the Difference Digest is more efficient than prior approaches such as Approximate Reconciliation Trees [5] and Characteristic Polynomial Interpolation [17]. We use Difference Digests to implement a generic KeyDiff service in Linux that runs over TCP and returns the sets of keys that differ between machines.},
	urldate = {2018-06-01},
	booktitle = {Proceedings of the {ACM} {SIGCOMM} 2011 {Conference}},
	publisher = {ACM},
	author = {Eppstein, David and Goodrich, Michael T. and Uyeda, Frank and Varghese, George},
	year = {2011},
	keywords = {difference digest, invertible bloom filter, set difference},
	pages = {218--229},
	file = {Eppstein et al_2011_What's the Difference.pdf:C\:\\Users\\zzy\\Zotero\\storage\\YXD873QQ\\Eppstein et al_2011_What's the Difference.pdf:application/pdf}
}

@inproceedings{goodrich_invertible_2011,
	title = {Invertible bloom lookup tables},
	booktitle = {Communication, {Control}, and {Computing} ({Allerton}), 2011 49th {Annual} {Allerton} {Conference} on},
	publisher = {IEEE},
	author = {Goodrich, Michael T. and Mitzenmacher, Michael},
	year = {2011},
	pages = {792--799},
	file = {Goodrich_Mitzenmacher_2011_Invertible bloom lookup tables.pdf:C\:\\Users\\zzy\\Zotero\\storage\\EHR5D923\\Goodrich_Mitzenmacher_2011_Invertible bloom lookup tables.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\RGUQBL6P\\6120248.html:text/html}
}

@article{kandula_dynamic_2007,
	title = {Dynamic load balancing without packet reordering},
	volume = {37},
	number = {2},
	journal = {ACM SIGCOMM Computer Communication Review},
	author = {Kandula, Srikanth and Katabi, Dina and Sinha, Shantanu and Berger, Arthur},
	year = {2007},
	pages = {51--62},
	file = {Kandula et al_2007_Dynamic load balancing without packet reordering.pdf:C\:\\Users\\zzy\\Zotero\\storage\\3QLB47C3\\Kandula et al_2007_Dynamic load balancing without packet reordering.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\6CJ95FP4\\citation.html:text/html}
}

@inproceedings{kompella_every_2009,
	title = {Every microsecond counts: tracking fine-grain latencies with a lossy difference aggregator},
	volume = {39},
	shorttitle = {Every microsecond counts},
	booktitle = {{ACM} {SIGCOMM} {Computer} {Communication} {Review}},
	publisher = {ACM},
	author = {Kompella, Ramana Rao and Levchenko, Kirill and Snoeren, Alex C. and Varghese, George},
	year = {2009},
	pages = {255--266},
	file = {Kompella et al_2009_Every microsecond counts.pdf:C\:\\Users\\zzy\\Zotero\\storage\\NI4BQW2Q\\Kompella et al. - 2009 - Every microsecond counts tracking fine-grain late.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\BHLCUDAZ\\citation.html:text/html}
}

@inproceedings{mai_is_2006,
	title = {Is sampled data sufficient for anomaly detection?},
	booktitle = {Proceedings of the 6th {ACM} {SIGCOMM} conference on {Internet} measurement},
	publisher = {ACM},
	author = {Mai, Jianning and Chuah, Chen-Nee and Sridharan, Ashwin and Ye, Tao and Zang, Hui},
	year = {2006},
	pages = {165--176},
	file = {Mai et al_2006_Is sampled data sufficient for anomaly detection.pdf:C\:\\Users\\zzy\\Zotero\\storage\\YREWWDF5\\Mai et al_2006_Is sampled data sufficient for anomaly detection.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\BQCC94HW\\citation.html:text/html}
}

@inproceedings{gember-jacobson_opennf:_2014,
	title = {{OpenNF}: {Enabling} innovation in network function control},
	volume = {44},
	shorttitle = {{OpenNF}},
	booktitle = {{ACM} {SIGCOMM} {Computer} {Communication} {Review}},
	publisher = {ACM},
	author = {Gember-Jacobson, Aaron and Viswanathan, Raajay and Prakash, Chaithan and Grandl, Robert and Khalid, Junaid and Das, Sourav and Akella, Aditya},
	year = {2014},
	pages = {163--174},
	file = {Gember-Jacobson et al_2014_OpenNF.pdf:C\:\\Users\\zzy\\Zotero\\storage\\TPLZ4DHW\\Gember-Jacobson et al_2014_OpenNF.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\Y9A2IDFI\\citation.html:text/html}
}

@article{mckeown_openflow:_2008,
	title = {{OpenFlow}: enabling innovation in campus networks},
	volume = {38},
	shorttitle = {{OpenFlow}},
	number = {2},
	journal = {ACM SIGCOMM Computer Communication Review},
	author = {McKeown, Nick and Anderson, Tom and Balakrishnan, Hari and Parulkar, Guru and Peterson, Larry and Rexford, Jennifer and Shenker, Scott and Turner, Jonathan},
	year = {2008},
	pages = {69--74},
	file = {McKeown et al_2008_OpenFlow.pdf:C\:\\Users\\zzy\\Zotero\\storage\\WR6M29W8\\McKeown et al_2008_OpenFlow.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\FLH33UFB\\citation.html:text/html}
}

@inproceedings{yu_profiling_2011,
	title = {Profiling {Network} {Performance} for {Multi}-tier {Data} {Center} {Applications}.},
	volume = {11},
	booktitle = {{NSDI}},
	author = {Yu, Minlan and Greenberg, Albert G. and Maltz, David A. and Rexford, Jennifer and Yuan, Lihua and Kandula, Srikanth and Kim, Changhoon},
	year = {2011},
	pages = {5--5},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\CQNMAV78\\Yu et al. - 2011 - Profiling Network Performance for Multi-tier Data .pdf:application/pdf;Yu et al_2011_Profiling Network Performance for Multi-tier Data Center Applications.pdf:C\:\\Users\\zzy\\Zotero\\storage\\AFLNQ37S\\Yu et al_2011_Profiling Network Performance for Multi-tier Data Center Applications.pdf:application/pdf}
}

@article{pagh_cuckoo_2004,
	title = {Cuckoo {Hashing}},
	volume = {51},
	issn = {0196-6774},
	doi = {10.1016/j.jalgor.2003.12.002},
	abstract = {We present a simple dictionary with worst case constant lookup time, equaling the theoretical performance of the classic dynamic perfect hashing scheme of Dietzfelbinger et al. [SIAM J. Comput. 23 (4) (1994) 738–761]. The space usage is similar to that of binary search trees. Besides being conceptually much simpler than previous dynamic dictionaries with worst case constant lookup time, our data structure is interesting in that it does not use perfect hashing, but rather a variant of open addressing where keys can be moved back in their probe sequences. An implementation inspired by our algorithm, but using weaker hash functions, is found to be quite practical. It is competitive with the best known dictionaries having an average case (but no nontrivial worst case) guarantee on lookup time.},
	number = {2},
	urldate = {2018-06-01},
	journal = {Journal of Algorithms},
	author = {Pagh, Rasmus and Rodler, Flemming Friche},
	month = may,
	year = {2004},
	keywords = {Information retrieval, Data structures, Dictionaries, Experiments, Hashing, Searching},
	pages = {122--144},
	file = {Pagh_Rodler_2004_Cuckoo hashing.pdf:C\:\\Users\\zzy\\Zotero\\storage\\NTY3P34C\\Pagh_Rodler_2004_Cuckoo hashing.pdf:application/pdf;ScienceDirect Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\Z2HZCLYE\\S0196677403001925.html:text/html}
}

@article{mitzenmacher_using_2007,
	title = {Using the {Power} of {Two} {Choices} to {Improve} {Bloom} {Filters}},
	volume = {4},
	issn = {1944-9488},
	doi = {10.1080/15427951.2007.10129136},
	abstract = {By Michael Mitzenmacher, Steve Lumetta. We consider the combination of two ideas from the hashing literature: the power of two choices and Bloom filters. Specifically, we...},
	language = {en},
	number = {1},
	urldate = {2018-06-03},
	journal = {Internet Mathematics},
	author = {Mitzenmacher, Michael and Lumetta, Steve},
	month = jan,
	year = {2007},
	keywords = {sketch and streaming algorithm, power of two choices},
	pages = {1433},
	file = {Elastic Sketch, Adaptive and Fast Network-wide Measurements-sigcomm18.pdf:C\:\\Users\\zzy\\Desktop\\Elastic Sketch, Adaptive and Fast Network-wide Measurements-sigcomm18.pdf:application/pdf;Mitzenmacher_Lumetta_2007_Using the Power of Two Choices to Improve Bloom Filters.pdf:C\:\\Users\\zzy\\Zotero\\storage\\WZBW4GMY\\Mitzenmacher_Lumetta_2007_Using the Power of Two Choices to Improve Bloom Filters.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\H4U4JDX2\\1433.html:text/html}
}

@inproceedings{manku_approximate_2002,
	title = {Approximate frequency counts over data streams},
	booktitle = {{VLDB}'02: {Proceedings} of the 28th {International} {Conference} on {Very} {Large} {Databases}},
	publisher = {Elsevier},
	author = {Manku, Gurmeet Singh and Motwani, Rajeev},
	year = {2002},
	keywords = {sketch and streaming algorithm},
	pages = {346--357},
	file = {Manku_Motwani_2002_Approximate frequency counts over data streams.pdf:C\:\\Users\\zzy\\Zotero\\storage\\2EBN6WZC\\Manku_Motwani_2002_Approximate frequency counts over data streams.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\YI9LALPC\\B978155860869650038X.html:text/html}
}

@inproceedings{zhang_online_2004,
	address = {New York, NY, USA},
	series = {{IMC} '04},
	title = {Online {Identification} of {Hierarchical} {Heavy} {Hitters}: {Algorithms}, {Evaluation}, and {Applications}},
	isbn = {978-1-58113-821-4},
	shorttitle = {Online {Identification} of {Hierarchical} {Heavy} {Hitters}},
	doi = {10.1145/1028788.1028802},
	abstract = {In traffic monitoring, accounting, and network anomaly detection, it is often important to be able to detect high-volume traffic clusters in near real-time. Such heavy-hitter traffic clusters are often hierarchical (\textit{ie}, they may occur at different aggregation levels like ranges of IP addresses) and possibly multidimensional (\textit{ie}, they may involve the combination of different IP header fields like IP addresses, port numbers, and protocol). Without prior knowledge about the precise structures of such traffic clusters, a naive approach would require the monitoring system to examine all possible ombinations of aggregates in order to detect the heavy hitters, which can be proohibitive in terms of computation resources. In this paper, we focus on online identification of 1-dimensional and 2-dimensional hierarchical heavy hitters (HHHs), arguably the two most important scenarios in traffic analysis. We show that the problem of HHH detection can be transformed to one of dynamic packet classification by taking a top-down approach and adaptively creating new rules to match HHHs. We then adapt several existing static packet classification algorithms to support dynamic packet classification. The resulting HHH detection algorithms have much lower worst-case update costs than existing algorithms and can provide tunable deterministic accuracy guarantees. As an application of these algorithms, we also propose robust techniques to detect changes among heavy-hitter traffic clusters. Our techniques can accommodate variability due to sampling that is increasingly used in network measurement. Evaluation based on real Internet traces collected at a Tier-1 ISP suggests that these techniques are remarkably accurate and efficient.},
	urldate = {2018-06-06},
	booktitle = {Proceedings of the 4th {ACM} {SIGCOMM} {Conference} on {Internet} {Measurement}},
	publisher = {ACM},
	author = {Zhang, Yin and Singh, Sumeet and Sen, Subhabrata and Duffield, Nick and Lund, Carsten},
	year = {2004},
	keywords = {change detection, network anomaly detection, data stream computation, hierarchical heavy hitters, packet classification},
	pages = {101--114},
	file = {Zhang et al_2004_Online Identification of Hierarchical Heavy Hitters.pdf:C\:\\Users\\zzy\\Zotero\\storage\\P8U94UZT\\Zhang et al_2004_Online Identification of Hierarchical Heavy Hitters.pdf:application/pdf}
}

@inproceedings{popescu_enabling_2017,
	address = {New York, NY, USA},
	series = {{SOSR} '17},
	title = {Enabling {Fast} {Hierarchical} {Heavy} {Hitter} {Detection} {Using} {Programmable} {Data} {Planes}},
	isbn = {978-1-4503-4947-5},
	doi = {10.1145/3050220.3060606},
	abstract = {Measuring and monitoring network traffic is a fundamental aspect in network management. This poster is a first step towards an SDN solution using an event triggered approach to support advanced monitoring dataplane capabilities. Leveraging P4 programmability, we built a solution to inform a remote controller about the detected hierarchical heavy hitters, thus minimizing control plane overheads.},
	urldate = {2018-06-06},
	booktitle = {Proceedings of the {Symposium} on {SDN} {Research}},
	publisher = {ACM},
	author = {Popescu, Diana Andreea and Antichi, Gianni and Moore, Andrew W.},
	year = {2017},
	keywords = {Hierarchical Heavy Hitters, P4, SDN},
	pages = {191--192},
	file = {Popescu et al_2017_Enabling Fast Hierarchical Heavy Hitter Detection Using Programmable Data Planes.pdf:C\:\\Users\\zzy\\Zotero\\storage\\9Z5XEDCC\\Popescu et al_2017_Enabling Fast Hierarchical Heavy Hitter Detection Using Programmable Data Planes.pdf:application/pdf}
}

@inproceedings{metwally_efficient_2005,
	title = {Efficient computation of frequent and top-k elements in data streams},
	booktitle = {International {Conference} on {Database} {Theory}},
	publisher = {Springer},
	author = {Metwally, Ahmed and Agrawal, Divyakant and El Abbadi, Amr},
	year = {2005},
	pages = {398--412},
	file = {Metwally et al_2005_Efficient computation of frequent and top-k elements in data streams.pdf:C\:\\Users\\zzy\\Zotero\\storage\\S2IWGKVR\\Metwally et al_2005_Efficient computation of frequent and top-k elements in data streams.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\CFK9ETPI\\978-3-540-30570-5_27.html:text/html}
}

@inproceedings{yang_elastic_2018,

	title = {Elastic {Sketch}: {Adaptive} and {Fast} {Network}-{Wide} {Measurements}},
	booktitle = {Proceedings of the {Conference} of the {ACM} {Special} {Interest} {Group} on {Data} {Communication}},
	author = {Yang, Tong and Jiang, Jie and Liu, Peng and Huang, Qun and Gong, Junzhi and Zhou, Yang and Miao, Rui and Li, Xiaoming and Uhlig, Steve},
	year = {2018},
	pages = {14},
	file = {_.pdf:C\:\\Users\\zzy\\Zotero\\storage\\PKVRPQNZ\\_.pdf:application/pdf}
}

@inproceedings{sonchack_turboflow:_2018,
	series = {{EuroSys} '18},
	title = {Turboflow: {Information} {Rich} {Flow} {Record} {Generation} on {Commodity} {Switches}},
	isbn = {978-1-4503-5584-1},
	shorttitle = {Turboflow},
	doi = {10.1145/3190508.3190558},
	abstract = {Fine-grained traffic flow records enable many powerful applications, especially in combination with telemetry systems that supports high coverage, i.e., of every link and at all times. Current solutions, however, make undesirable trade-offs between infrastructure cost and information richness. Switches that generate flow records, e.g., NetFlow switches, are a low cost solution but current designs sacrifice information richness, e.g., by sampling. Information rich alternatives rely heavily on servers, which increases cost to the point that they are impractical for high coverage. In this paper, we present the design, implementation, and evaluation of TurboFlow, a flow record generator for programmable switches that does not compromise on either cost or information richness. TurboFlow produces fine- grained and unsampled flow records with custom features entirely at the switch without relying on any support from external servers. This is a challenge given high traffic rates and the limitations of switch hardware. To overcome, we decompose the flow record generation algorithm and optimize it for the heterogeneous processors in programmable switches. We show that with this design, TurboFlow can support multi-terabit workloads on readily available commodity switches to enable information rich monitoring with high coverage.},
	urldate = {2018-06-17},
	booktitle = {Proceedings of the {Thirteenth} {EuroSys} {Conference}},
	author = {Sonchack, John and Aviv, Adam J. and Keller, Eric and Smith, Jonathan M.},
	year = {2018},
	keywords = {network monitoring, netflow, P4, programmable switch hardware},
	pages = {11:1--11:16},
	file = {Sonchack et al_2018_Turboflow.pdf:C\:\\Users\\zzy\\Zotero\\storage\\37759HQH\\Sonchack et al_2018_Turboflow.pdf:application/pdf}
}

@article{whang_linear-time_1990,
	title = {A {Linear}-{Time} {Probabilistic} {Counting} {Algorithm} for {Database} {Applications}},
	volume = {15},
	number = {2},
	journal = {ACM Transactions on Database Systems},
	author = {Whang, Kyu-Young and Vander-Zanden, Brad T. and Taylor, Howard M.},
	year = {1990},
	pages = {208--229},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\I8KC3GVG\\citation.html:text/html;Whang et al_1990_A linear-time probabilistic counting algorithm for database applications.pdf:C\:\\Users\\zzy\\Zotero\\storage\\7QTVCILF\\Whang et al_1990_A linear-time probabilistic counting algorithm for database applications.pdf:application/pdf}
}

@misc{noauthor_barefoot_nodate,
	title = {Barefoot {Tofino}: {World}'s {Fastest} {P}4-{Programmable} {Ethernet} {Switch} {ASICs}},
	url = {https://barefootnetworks.com/},
	urldate = {2018-06-19},
	journal = {Barefoot Networks},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\KD4S3A5G\\brief-tofino.html:text/html}
}

@article{bosshart_p4:_2014,
	title = {P4: {Programming} {Protocol}-independent {Packet} {Processors}},
	volume = {44},
	issn = {0146-4833},
	shorttitle = {P4},
	doi = {10.1145/2656877.2656890},
	abstract = {P4 is a high-level language for programming protocol-independent packet processors. P4 works in conjunction with SDN control protocols like OpenFlow. In its current form, OpenFlow explicitly specifies protocol headers on which it operates. This set has grown from 12 to 41 fields in a few years, increasing the complexity of the specification while still not providing the flexibility to add new headers. In this paper we propose P4 as a strawman proposal for how OpenFlow should evolve in the future. We have three goals: (1) Reconfigurability in the field: Programmers should be able to change the way switches process packets once they are deployed. (2) Protocol independence: Switches should not be tied to any specific network protocols. (3) Target independence: Programmers should be able to describe packet-processing functionality independently of the specifics of the underlying hardware. As an example, we describe how to use P4 to configure a switch to add a new hierarchical label.},
	number = {3},
	urldate = {2018-06-19},
	journal = {SIGCOMM Comput. Commun. Rev.},
	author = {Bosshart, Pat and Daly, Dan and Gibb, Glen and Izzard, Martin and McKeown, Nick and Rexford, Jennifer and Schlesinger, Cole and Talayco, Dan and Vahdat, Amin and Varghese, George and Walker, David},
	month = jul,
	year = {2014},
	keywords = {p4, protocol-independent, reconfigurability, sdn},
	pages = {87--95},
	file = {Bosshart et al_2014_P4.pdf:C\:\\Users\\zzy\\Zotero\\storage\\3AXFDMMA\\Bosshart et al_2014_P4.pdf:application/pdf}
}

@misc{claise_cisco_2004,
	title = {Cisco {Systems} {NetFlow} {Services} {Export} {Version} 9},
	url = {https://tools.ietf.org/html/rfc3954},
	language = {en},
	urldate = {2018-06-19},
	author = {Claise, B.},
	year = {2004},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\VVTNKBPI\\rfc3954.html:text/html}
}

@inproceedings{sivaraman_heavy-hitter_2017,
	title = {Heavy-{Hitter} {Detection} {Entirely} in the {Data} {Plane}},
	isbn = {978-1-4503-4947-5},
	doi = {10.1145/3050220.3063772},
	abstract = {Identifying the "heavy hitter" flows or flows with large traffic volumes in the data plane is important for several applications e.g., flow-size aware routing, DoS detection, and traffic engineering. However, measurement in the data plane is constrained by the need for line-rate processing (at 10-100Gb/s) and limited memory in switching hardware. We propose HashPipe, a heavy hitter detection algorithm using emerging programmable data planes. HashPipe implements a pipeline of hash tables which retain counters for heavy flows while evicting lighter flows over time. We prototype HashPipe in P4 and evaluate it with packet traces from an ISP backbone link and a data center. On the ISP trace (which contains over 400,000 flows), we find that HashPipe identifies 95\% of the 300 heaviest flows with less than 80KB of memory.},
	urldate = {2018-06-19},
	booktitle = {Proceedings of the {Symposium} on {SDN} {Research}},
	author = {Sivaraman, Vibhaalakshmi and Narayana, Srinivas and Rottenstreich, Ori and Muthukrishnan, S. and Rexford, Jennifer},
	year = {2017},
	keywords = {Network Algorithms, Network Monitoring, Programmable Networks, Software-Defined Networks},
	pages = {164--176},
	file = {Sivaraman et al_2017_Heavy-Hitter Detection Entirely in the Data Plane.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SNDCWSIH\\Sivaraman et al_2017_Heavy-Hitter Detection Entirely in the Data Plane.pdf:application/pdf}
}

@inproceedings{sivaraman_dc.p4:_2015,
	address = {New York, NY, USA},
	series = {{SOSR} '15},
	title = {{DC}.{P}4: {Programming} the {Forwarding} {Plane} of a {Data}-center {Switch}},
	isbn = {978-1-4503-3451-8},
	shorttitle = {{DC}.{P}4},
	doi = {10.1145/2774993.2775007},
	abstract = {The P4 programming language [29, 16] has been recently proposed as a high-level language to program the forwarding plane of programmable packet processors, spanning the spectrum from software switches through FPGAs, NPUs and reconfigurable hardware switches. This paper presents a case study of using P4 to express the forwarding plane behavior of a datacenter switch, comparable in functionality to single-chip shared-memory switches found in many datacenters today. This case study allows us to understand how specific P4 constructs were useful in modeling specific datacenter switch features. We also outline additional language constructs that needed to be added to P4 to support certain features of a datacenter switch. We discuss several lessons that we learned in the process and distill these into a proposal for how P4 could evolve in the future.},
	urldate = {2018-06-22},
	booktitle = {Proceedings of the 1st {ACM} {SIGCOMM} {Symposium} on {Software} {Defined} {Networking} {Research}},
	publisher = {ACM},
	author = {Sivaraman, Anirudh and Kim, Changhoon and Krishnamoorthy, Ramkumar and Dixit, Advait and Budiu, Mihai},
	year = {2015},
	keywords = {datacenter switch, language design, programmable forwarding planes},
	pages = {2:1--2:8},
	file = {Sivaraman et al_2015_DC.pdf:C\:\\Users\\zzy\\Zotero\\storage\\XHLVHRS2\\Sivaraman et al_2015_DC.pdf:application/pdf}
}

@inproceedings{bosshart_forwarding_2013,
	title = {Forwarding metamorphosis: {Fast} programmable match-action processing in hardware for {SDN}},
	volume = {43},
	shorttitle = {Forwarding metamorphosis},
	booktitle = {{ACM} {SIGCOMM} {Computer} {Communication} {Review}},
	publisher = {ACM},
	author = {Bosshart, Pat and Gibb, Glen and Kim, Hun-Seok and Varghese, George and McKeown, Nick and Izzard, Martin and Mujica, Fernando and Horowitz, Mark},
	year = {2013},
	pages = {99--110},
	file = {Bosshart et al_2013_Forwarding metamorphosis.pdf:C\:\\Users\\zzy\\Zotero\\storage\\DBBFER5G\\Bosshart et al_2013_Forwarding metamorphosis.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\46IURGC2\\citation.html:text/html}
}

@inproceedings{sivaraman_packet_2016,
	title = {Packet transactions: {High}-level programming for line-rate switches},
	shorttitle = {Packet transactions},
	booktitle = {Proceedings of the 2016 {ACM} {SIGCOMM} {Conference}},
	publisher = {ACM},
	author = {Sivaraman, Anirudh and Cheung, Alvin and Budiu, Mihai and Kim, Changhoon and Alizadeh, Mohammad and Balakrishnan, Hari and Varghese, George and McKeown, Nick and Licking, Steve},
	year = {2016},
	pages = {15--28},
	file = {Sivaraman et al_2016_Packet transactions.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ZWG2TY3D\\Sivaraman et al_2016_Packet transactions.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\3MPE6L34\\citation.html:text/html}
}

@misc{noauthor_bloom_2018,
	title = {Bloom {Filter}},
	copyright = {Creative Commons Attribution-ShareAlike License},
	url = {https://en.wikipedia.org/w/index.php?title=Bloom_filter&oldid=846502261},
	abstract = {A Bloom filter is a space-efficient probabilistic data structure, conceived by Burton Howard Bloom in 1970, that is used to test whether an element is a member of a set. False positive matches are possible, but false negatives are not – in other words, a query returns either "possibly in set" or "definitely not in set". Elements can be added to the set, but not removed (though this can be addressed with a "counting" filter); the more elements that are added to the set, the larger the probability of false positives.
Bloom proposed the technique for applications where the amount of source data would require an impractically large amount of memory if "conventional" error-free hashing techniques were applied. He gave the example of a hyphenation algorithm for a dictionary of 500,000 words, out of which 90\% follow simple hyphenation rules, but the remaining 10\% require expensive disk accesses to retrieve specific hyphenation patterns. With sufficient core memory, an error-free hash could be used to eliminate all unnecessary disk accesses; on the other hand, with limited core memory, Bloom's technique uses a smaller hash area but still eliminates most unnecessary accesses. For example, a hash area only 15\% of the size needed by an ideal error-free hash still eliminates 85\% of the disk accesses.
More generally, fewer than 10 bits per element are required for a 1\% false positive probability, independent of the size or number of elements in the set.},
	language = {en},
	urldate = {2018-06-29},
	journal = {Wikipedia},
	month = jun,
	year = {2018},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\UUJVAXIJ\\index.html:text/html}
}

@misc{noauthor_bmv2:_2018,
	title = {bmv2: {P}4 {Software} {Switch}},
	copyright = {Apache-2.0},
	shorttitle = {behavioral-model},
	url = {https://github.com/p4lang/behavioral-model},
	urldate = {2018-06-29},
	publisher = {p4language},
	year = {2018}
}

@article{chen_counter_2017,
	title = {Counter {Tree}: {A} {Scalable} {Counter} {Architecture} for {Per}-{Flow} {Traffic} {Measurement}},
	volume = {25},
	shorttitle = {Counter tree},
	number = {2},
	journal = {IEEE/ACM Transactions on Networking},
	author = {Chen, Min and Chen, Shigang and Cai, Zhiping},
	year = {2017},
	keywords = {network traffic measurement},
	pages = {1249--1262},
	file = {Chen et al_2017_Counter tree.pdf:C\:\\Users\\zzy\\Zotero\\storage\\UTJGQTSA\\Chen et al_2017_Counter tree.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\H43AE2LH\\citation.html:text/html}
}

@book{wakeman_great_1986,
	title = {The {Great} {Enterprise}},
	isbn = {978-0-520-23518-2},
	url = {https://book.douban.com/subject/2334403/},
	urldate = {2018-07-05},
	publisher = {University of California Press},
	author = {Wakeman, Frederic},
	month = jan,
	year = {1986},
	keywords = {海外中国研究, 苦逼PHD党, 历史, 列文森中国研究书籍奖, 美国中国学研究, 未收录, 魏斐德, 中国},
	file = {Wakeman_1986_The Great Enterprise.pdf:C\:\\Users\\zzy\\Zotero\\storage\\SUJIYFGV\\Wakeman_1986_The Great Enterprise.pdf:application/pdf}
}

@inproceedings{miao_silkroad:_2017,
	series = {{SIGCOMM}'17},
	title = {{SilkRoad}: {Making} {Stateful} {Layer}-4 {Load} {Balancing} {Fast} and {Cheap} {Using} {Switching} {ASICs}},
	shorttitle = {{SilkRoad}},
	booktitle = {Proceedings of the {Conference} of the {ACM} {Special} {Interest} {Group} on {Data} {Communication}},
	author = {Miao, Rui and Zeng, Hongyi and Kim, Changhoon and Lee, Jeongkeun and Yu, Minlan},
	year = {2017},
	pages = {15--28},
	file = {Miao et al_2017_SilkRoad.pdf:C\:\\Users\\zzy\\Zotero\\storage\\IA5LBDAK\\Miao et al_2017_SilkRoad.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\A5SMJ8IL\\citation.html:text/html}
}

@article{khan_streaming_2014,
	title = {Streaming solutions for fine-grained network traffic measurements and analysis},
	volume = {22},
	number = {2},
	journal = {IEEE/ACM Transactions on Networking (TON)},
	author = {Khan, Faisal and Hosein, Nicholas and Ghiasi, Soheil and Chuah, Chen-Nee and Sharma, Puneet},
	year = {2014},
	pages = {377--390},
	file = {Khan et al_2014_Streaming solutions for fine-grained network traffic measurements and analysis.pdf:C\:\\Users\\zzy\\Zotero\\storage\\7T4SNBCX\\Khan et al_2014_Streaming solutions for fine-grained network traffic measurements and analysis.pdf:application/pdf;Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\YDUZS3KX\\citation.html:text/html}
}

@misc{noauthor_access_nodate,
	title = {Access {Time} of {DRAM} and {SRAM}},
	url = {https://www.webopedia.com/TERM/A/access_time.html},
	urldate = {2018-07-06},
	file = {What is Access Time? Webopedia Definition:C\:\\Users\\zzy\\Zotero\\storage\\IHV3ZWU5\\access_time.html:text/html}
}

@misc{noauthor_definition_2018,
	title = {Definition of {F}1 {Score}},
	copyright = {Creative Commons Attribution-ShareAlike License},
	url = {https://en.wikipedia.org/w/index.php?title=F1_score&oldid=841695054},
	abstract = {In statistical analysis of binary classification, the F1 score (also F-score or F-measure) is a measure of a test's accuracy. It considers both the precision p and the recall r of the test to compute the score: p is the number of correct positive results divided by the number of all positive results returned by the classifier, and r is the number of correct positive results divided by the number of all relevant samples (all samples that should have been identified as positive). The F1 score is the harmonic average of the precision and recall, where an F1 score reaches its best value at 1 (perfect precision and recall) and worst at 0.},
	language = {en},
	urldate = {2018-07-08},
	journal = {Wikipedia},
	month = may,
	year = {2018},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\8KDP9S3L\\index.html:text/html}
}

@inproceedings{chen_dynamic_2017,
	series = {{ICNP}'17},
	title = {The {Dynamic} {Cuckoo} {Filter}},
	doi = {10.1109/ICNP.2017.8117563},
	abstract = {The emergence of large-scale dynamic sets in real applications creates stringent requirements for approximate set representation structures: 1) the capacity of the set representation structures should support flexibly extending or reducing to cope with dynamically changing of set size; 2) the set representation structures should support reliable delete operation. Existing techniques for approximate set representation, e.g., the cuckoo filter, the Bloom filter and its variants cannot meet both the requirements of a dynamic set. To solve the problem, in this paper we propose the dynamic cuckoo filter (DCF) to support reliable delete operation and elastic capacity for dynamic set representation and membership testing. Two factors contribute to the efficiency of the DCF design. First, the data structure of a DCF is extendable, making the representation of a dynamic set space efficient. Second, a DCF utilizes a monopolistic fingerprint for representing an item and guarantees reliable delete operation. Experiment results show that compared to the existing state-of-the-art designs, DCF achieves 75\% reduction in memory cost, 50\% improvement in construction speed, and 80\% improvement in speed of membership query. We implement a prototype file backup system and use DCF for data deduplication. Comprehensive experiment results demonstrate the efficiency of our DCF design compared to existing schemes.},
	booktitle = {2017 {IEEE} 25th {International} {Conference} on {Network} {Protocols} ({ICNP})},
	author = {Chen, H. and Liao, L. and Jin, H. and Wu, J.},
	month = oct,
	year = {2017},
	keywords = {data structure, data structures, Arrays, Testing, Encoding, approximate set representation structures, Bloom filter, cuckoo filter, data deduplication, DCF design, dynamic cuckoo filter, dynamic set representation, Dynamic set representation, large-scale dynamic sets, membership testing, monopolistic fingerprint, prototype file backup system, Prototypes, Reliability, reliable delete operation, set membership testing, Upper bound},
	pages = {1--10},
	file = {Chen et al_2017_The dynamic cuckoo filter.pdf:C\:\\Users\\zzy\\Zotero\\storage\\JTAAJ4A8\\Chen et al_2017_The dynamic cuckoo filter.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\4M2VQQ7Q\\8117563.html:text/html}
}

@inproceedings{gong_heavykeeper:_2018,
	series = {{ATC}'18},
	title = {{HeavyKeeper}: {An} {Accurate} {Algorithm} for {Finding} {Top}-k {Elephant} {Flows}},
	abstract = {1 Finding top-k elephant ﬂows is a critical task in network trafﬁc measurement, with many applications in congestion control, anomaly detection and trafﬁc engineering. As the line rates keep increasing in today’s networks, designing accurate and fast algorithms for online identiﬁcation of elephant ﬂows becomes more and more challenging. The prior algorithms are seriously limited in achieving accuracy under the constraints of heavy trafﬁc and small on-chip memory in use. We observe that the basic strategies adopted by these algorithms either require signiﬁcant space overhead to measure the sizes of all ﬂows or incur signiﬁcant inaccuracy when deciding which ﬂows to keep track of. In this paper, we adopt a new strategy, called count-with-exponential-decay, to achieve space-accuracy balance by actively removing small ﬂows through decaying, while minimizing the impact on large ﬂows, so as to achieve high precision in ﬁnding top-k elephant ﬂows. Moreover, the proposed algorithm called HeavyKeeper incurs small, constant processing overhead per packet and thus supports high line rates. Experimental results show that HeavyKeeper algorithm achieves 99.99\% precision with a small memory size, and reduces the error by around 3 orders of magnitude on average compared to the state-of-the-art.},
	language = {en},
	booktitle = {2018 {USENIX} {Annual} {Technical} {Conference}},
	author = {Gong, Junzhi and Yang, Tong and Zhang, Haowei and Li, Hao and Uhlig, Steve and Chen, Shigang and Uden, Lorna and Li, Xiaoming},
	year = {2018},
	pages = {13},
	file = {Gong et al. - HeavyKeeper An Accurate Algorithm for Finding Top.pdf:C\:\\Users\\zzy\\Zotero\\storage\\425CY3ES\\Gong et al. - HeavyKeeper An Accurate Algorithm for Finding Top.pdf:application/pdf}
}

@misc{noauthor_--_2018,
	title = {影响世界的治疗口吃的书籍-施瓦兹博士-不再口吃},
	year = {2018},
	file = {影响世界的治疗口吃的书籍-施瓦兹博士-不再口吃.pdf:C\:\\Users\\zzy\\Zotero\\storage\\5NTKPWX4\\影响世界的治疗口吃的书籍-施瓦兹博士-不再口吃.pdf:application/pdf}
}

@inproceedings{roy_inside_2015,
	series = {{SIGCOMM} '15},
	title = {Inside the {Social} {Network}'s ({Datacenter}) {Network}},
	isbn = {978-1-4503-3542-3},
	doi = {10.1145/2785956.2787472},
	abstract = {Large cloud service providers have invested in increasingly larger datacenters to house the computing infrastructure required to support their services. Accordingly, researchers and industry practitioners alike have focused a great deal of effort designing network fabrics to efficiently interconnect and manage the traffic within these datacenters in performant yet efficient fashions. Unfortunately, datacenter operators are generally reticent to share the actual requirements of their applications, making it challenging to evaluate the practicality of any particular design. Moreover, the limited large-scale workload information available in the literature has, for better or worse, heretofore largely been provided by a single datacenter operator whose use cases may not be widespread. In this work, we report upon the network traffic observed in some of Facebook's datacenters. While Facebook operates a number of traditional datacenter services like Hadoop, its core Web service and supporting cache infrastructure exhibit a number of behaviors that contrast with those reported in the literature. We report on the contrasting locality, stability, and predictability of network traffic in Facebook's datacenters, and comment on their implications for network architecture, traffic engineering, and switch design.},
	urldate = {2018-07-16},
	booktitle = {Proceedings of the 2015 {ACM} {Conference} on {Special} {Interest} {Group} on {Data} {Communication}},
	author = {Roy, Arjun and Zeng, Hongyi and Bagga, Jasmeet and Porter, George and Snoeren, Alex C.},
	year = {2015},
	keywords = {datacenter, patterns, traffic},
	pages = {123--137},
	file = {Roy et al_2015_Inside the Social Network's (Datacenter) Network.pdf:C\:\\Users\\zzy\\Zotero\\storage\\4ZZYKRMX\\Roy et al_2015_Inside the Social Network's (Datacenter) Network.pdf:application/pdf}
}

@inproceedings{huang_you_2018,
	series = {{INFOCOM}'18},
	title = {You {Can} {Drop} but {You} {Can}'t {Hide}: {K}-persistent {Spread} {Estimation} in {High}-speed {Networks}},
	shorttitle = {You {Can} {Drop} but {You} {Can}’t {Hide}},
	booktitle = {{IEEE} {International} {Conference} on {Computer} {Communications}},
	author = {Huang, He and Sun, Yu-E. and Chen, Shigang and Tang, Shaojie and Han, Kai and Yuan, Jing and Yang, Wenjian},
	year = {2018},
	file = {Fulltext:C\:\\Users\\zzy\\Zotero\\storage\\X7T7EFLG\\Huang et al. - You Can Drop but You Can’t Hide K-persistent Spre.pdf:application/pdf;Huang et al_You Can Drop but You Can’t Hide.pdf:C\:\\Users\\zzy\\Zotero\\storage\\ZXJ793ZI\\Huang et al_You Can Drop but You Can’t Hide.pdf:application/pdf}
}

@misc{cisco_configuring_2018,
	title = {Configuring {NetFlow} and {NetFlow} {Data} {Export}},
	url = {http://www.cisco.com/en/US/docs/ios-xml/ios/netflow/configuration/15-2s/cfg-nflow-data-expt.html},
	language = {en},
	urldate = {2018-07-16},
	journal = {Cisco},
	author = {Cisco},
	year = {2018},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\HPGPTB2W\\cfg-nflow-data-expt.html:text/html}
}

@book{jorgensen_beejs_2016,
	title = {Beej's {Guide} to {Network} {Programming}},
	author = {Jorgensen, Beej},
	year = {2016},
	file = {bgnet_USLetter_2.pdf:C\:\\Users\\zzy\\Zotero\\storage\\N8GQJS4V\\bgnet_USLetter_2.pdf:application/pdf}
}

@book{muthukrishnan_data_2005,
	title = {Data {Streams}:{Algorithms} and {Applications}},
	isbn = {978-1-933019-14-7},
	shorttitle = {Data {Streams}},
	url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=8186985},
	abstract = {Data stream algorithms as an active research agenda emerged only over the past few years, even though the concept of making few passes over the data for performing computations has been around since the early days of Automata Theory. The data stream agenda now pervades many branches of Computer Science including databases, networking, knowledge discovery and data mining, and hardware systems. Industry is in synch too, with Data Stream Management Systems (DSMSs) and special hardware to deal with data speeds. Even beyond Computer Science, data stream concerns are emerging in physics, atmospheric science and statistics. Data Streams: Algorithms and Applications focuses on the algorithmic foundations of data streaming. In the data stream scenario, input arrives very rapidly and there is limited memory to store the input. Algorithms have to work with one or few passes over the data, space less than linear in the input size or time significantly less than the input size. In the past few yea s, a new theory has emerged for reasoning about algorithms that work within these constraints on space, time and number of passes. Some of the methods rely on metric embeddings, pseudo-random computations, sparse approximation theory and communication complexity. The applications for this scenario include IP network traffic analysis, mining text message streams and processing massive data sets in general. Data Streams: Algorithms and Applications surveys the emerging area of algorithms for processing data streams and associated applications. An extensive bibliography with over 200 entries points the reader to further resources for exploration.},
	urldate = {2018-07-26},
	publisher = {Now Foundations and Trends},
	author = {Muthukrishnan, S.},
	year = {2005},
	doi = {10.1561/0400000002},
	keywords = {sketch and streaming algorithm},
	file = {IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\JUDISE73\\articleDetails.html:text/html;Muthukrishnan - 2005 - Data Streams Algorithms and Applications.pdf:C\:\\Users\\zzy\\Zotero\\storage\\NAUP3AFG\\Muthukrishnan - 2005 - Data Streams Algorithms and Applications.pdf:application/pdf}
}

@misc{noauthor_caida_nodate,
	title = {{CAIDA} {UCSD} {Anonymized} {Internet} {Traces} {Dataset} - 2018},
	url = {http://www.caida.org/data/passive/passive_dataset.xml},
	abstract = {CAIDA's passive traces dataset contains traces collected from high-speed monitors on a commercial backbone link. The data collection started in April 2008 and is ongoing. These data are useful for research on the characteristics of Internet traffic, including application breakdown, security events, geographic and topological distribution, flow volume and duration. For an overview of all traces see the trace statistics page)},
	urldate = {2018-07-27},
	journal = {CAIDA},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\4A52K2ZA\\passive_dataset.html:text/html}
}

@misc{noauthor_sampled_nodate,
	title = {Sampled {NetFlow}},
	url = {https://www.cisco.com/c/en/us/td/docs/ios/12_0s/feature/guide/12s_sanf.html},
	abstract = {Sampled NetFlow},
	language = {en},
	urldate = {2018-07-29},
	journal = {Cisco},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\8AZD3TNC\\12s_sanf.html:text/html}
}

@inproceedings{ben-basat_efficient_2018,
	series = {{ICNP}'18},
	title = {Efficient {Measurement} on {Programmable} {Switches} {Using} {Probabilistic} {Recirculation}},
	doi = {10.1109/ICNP.2018.00047},
	abstract = {Programmable network switches promise flexibility and high throughput, enabling applications such as load balancing and traffic engineering. Network measurement is a fundamental building block for such applications, including tasks such as the identification of heavy hitters (largest flows) or the detection of traffic changes. However, high-throughput packet processing architectures place certain limitations on the programming model, such as restricted branching, limited capability for memory access, and a limited number of processing stages. These limitations restrict the types of measurement algorithms that can run on programmable switches. In this paper, we focus on the RMT programmable high-throughput switch architecture, and carefully examine its constraints on designing measurement algorithms. We demonstrate our findings while solving the heavy hitter problem. We introduce PRECISION, an algorithm that uses Probabilistic Recirculation to find top flows on a programmable switch. By recirculating a small fraction of packets, PRECISION simplifies the access to stateful memory to conform with RMT limitations and achieves higher accuracy than previous heavy hitter detection algorithms that avoid recirculation. We also analyze the effect of each architectural constraint on the measurement accuracy and provide insights for measurement algorithm designers.},
	booktitle = {2018 {IEEE} 26th {International} {Conference} on {Network} {Protocols} ({ICNP})},
	author = {Ben-Basat, R. and Chen, X. and Einziger, G. and Rottenstreich, O.},
	month = sep,
	year = {2018},
	keywords = {Approximation algorithms, Computer architecture, computer networks, Frequency estimation, Heavy Hitter, heavy hitter problem, network measurement, Network Measurement, packet processing architectures, Pipelines, probabilistic recirculation, probability, Programmable Data Plane, programmable network, programmable switch, Programming, Random access memory, resource allocation, RMT, RMT limitations, RMT programmable high-throughput switch architecture, telecommunication traffic, Throughput, traffic changes, traffic engineering},
	pages = {313--323},
	file = {Ben-Basat et al_2018_Efficient Measurement on Programmable Switches Using Probabilistic Recirculation.pdf:C\:\\Users\\zzy\\Zotero\\storage\\V3QW3Y7C\\Ben-Basat et al_2018_Efficient Measurement on Programmable Switches Using Probabilistic Recirculation.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\zzy\\Zotero\\storage\\26PNQGTL\\8526835.html:text/html}
}



@misc{noauthor_edgecore_nodate,
	title = {Edgecore {Networks}},
	url = {https://www.edge-core.com/productsInfo.php?cls=1&cls2=180&cls3=181&id=335},
	language = {en},
	urldate = {2019-01-08},
	journal = {Edgecore Networks},
	file = {Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\IXWYWL6G\\productsInfo.html:text/html}
}



@misc{zhao_hashflow_2018,
	title = {{HashFlow} {Implemented} in {P}4},
	urldate = {2019-01-12},
	author = {Zhao, Zongyi},
	month = dec,
	year = {2018},
}

@article{zhang_more_2017,
	title = {More {Load}, {More} {Differentiation} - {Let} {More} {Flows} {Finish} {Before} {Deadline} in {Data} {Center} {Networks}},
	volume = {127},
	issn = {1389-1286},
	doi = {10.1016/j.comnet.2017.08.020},
	urldate = {2019-04-02},
	journal = {Computer Networks},
	author = {Zhang, Han and Shi, Xingang and Guo, Yingya and Wang, Zhiliang and Yin, Xia},
	month = nov,
	year = {2017},
	keywords = {Congestion, Data center, Deadline, Flow completion time, Rate control},
	pages = {352--367},
	file = {ScienceDirect Snapshot:C\:\\Users\\zzy\\Zotero\\storage\\DBEBBKRB\\S138912861730333X.html:text/html;Zhang et al_2017_More load, more differentiation — Let more flows finish before deadline in data.pdf:C\:\\Users\\zzy\\Zotero\\storage\\K35E7GGC\\Zhang et al_2017_More load, more differentiation — Let more flows finish before deadline in data.pdf:application/pdf}
}


@inproceedings{zhang_more_2015,
	title = {More {Load}, {More} {Differentiation} - {A} {Design} {Principle} for {Deadline}-{Aware} {Congestion} {Control}},
	doi = {10.1109/INFOCOM.2015.7218375},
	booktitle = {2015 {IEEE} {Conference} on {Computer} {Communications}},
	author = {Zhang, H. and Shi, X. and Yin, X. and Ren, F. and Wang, Z.},
	month = apr,
	year = {2015},
	pages = {127--135},
}


@article{wang_efficient_2019,
	title = {Efficient {Scheduling} of {Weighted} {Coflows} in {Data} {Centers}},
	issn = {1045-9219},
	doi = {10.1109/TPDS.2019.2905560},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	author = {Wang, Z. and Zhang, H. and Shi, X. and Geng, H. and Li, Y. and Yin, X. and Liu, J. and Wu, Q.},
	year = {2019},
	keywords = {Approximation algorithms, CCT WCCT, Coflow, Data centers, DataCenter, Minimization, Schedules, Scheduling, Scheduling algorithms, Weight},
	pages = {1--1},
}